diff --git a/.github/workflows/airflow-plugin.yml b/.github/workflows/airflow-plugin.yml index 21fd547114872..a9be56a69560c 100644 --- a/.github/workflows/airflow-plugin.yml +++ b/.github/workflows/airflow-plugin.yml @@ -51,6 +51,9 @@ jobs: - python-version: "3.10" extra_pip_requirements: 'apache-airflow==2.8.1 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.8.1/constraints-3.10.txt' extra_pip_extras: plugin-v2 + - python-version: "3.10" + extra_pip_requirements: 'apache-airflow==2.9.0 -c https://raw.githubusercontent.com/apache/airflow/constraints-2.9.0/constraints-3.10.txt' + extra_pip_extras: plugin-v2 fail-fast: false steps: - name: Set up JDK 17 diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index 3d1d4090e4fbd..fe50ad67c6492 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -6,6 +6,11 @@ on: pull_request: branches: - "**" + types: + - labeled + - opened + - synchronize + - reopened release: types: [published] @@ -125,7 +130,8 @@ jobs: if: ${{ steps.ci-optimize.outputs.smoke-test-change == 'true' }} run: | python ./.github/scripts/check_python_package.py - ./gradlew :smoke-test:lint + ./gradlew :smoke-test:pythonLint + ./gradlew :smoke-test:cypressLint gms_build: name: Build and Push DataHub GMS Docker Image diff --git a/README.md b/README.md index 0db06d29eac78..15289f663f7a8 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ HOSTED_DOCS_ONLY--> [![PyPI version](https://badge.fury.io/py/acryl-datahub.svg)](https://badge.fury.io/py/acryl-datahub) [![build & test](https://github.com/datahub-project/datahub/workflows/build%20&%20test/badge.svg?branch=master&event=push)](https://github.com/datahub-project/datahub/actions?query=workflow%3A%22build+%26+test%22+branch%3Amaster+event%3Apush) [![Docker Pulls](https://img.shields.io/docker/pulls/acryldata/datahub-gms.svg)](https://hub.docker.com/r/acryldata/datahub-gms) -[![Slack](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://datahubproject.io/slack?utm_source=docs&utm_medium=docs&utm_campaign=docs_page_link) +[![Slack](https://img.shields.io/badge/slack-join_chat-white.svg?logo=slack&style=social)](https://datahubproject.io/slack?utm_source=github&utm_medium=readme&utm_campaign=github_readme) [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](https://github.com/datahub-project/datahub/blob/master/docs/CONTRIBUTING.md) [![GitHub commit activity](https://img.shields.io/github/commit-activity/m/datahub-project/datahub)](https://github.com/datahub-project/datahub/pulls?q=is%3Apr) [![License](https://img.shields.io/github/license/datahub-project/datahub)](https://github.com/datahub-project/datahub/blob/master/LICENSE) @@ -106,7 +106,7 @@ We welcome contributions from the community. Please refer to our [Contributing G ## Community -Join our [Slack workspace](https://datahubproject.io/slack?utm_source=docs&utm_medium=docs&utm_campaign=docs_page_link) for discussions and important announcements. You can also find out more about our upcoming [town hall meetings](docs/townhalls.md) and view past recordings. +Join our [Slack workspace](https://datahubproject.io/slack?utm_source=github&utm_medium=readme&utm_campaign=github_readme) for discussions and important announcements. You can also find out more about our upcoming [town hall meetings](docs/townhalls.md) and view past recordings. ## Adoption diff --git a/build.gradle b/build.gradle index b61140999a82f..5264c1c58313c 100644 --- a/build.gradle +++ b/build.gradle @@ -54,7 +54,7 @@ buildscript { ext.hazelcastVersion = '5.3.6' ext.ebeanVersion = '12.16.1' ext.googleJavaFormatVersion = '1.18.1' - ext.openLineageVersion = '1.5.0' + ext.openLineageVersion = '1.13.1' ext.logbackClassicJava8 = '1.2.12' ext.docker_registry = 'acryldata' @@ -168,7 +168,8 @@ project.ext.externalDependency = [ 'jettison': 'org.codehaus.jettison:jettison:1.5.4', 'jgrapht': 'org.jgrapht:jgrapht-core:1.5.1', 'jna': 'net.java.dev.jna:jna:5.12.1', - 'jsonPatch': 'com.github.java-json-tools:json-patch:1.13', + 'jsonPatch': 'jakarta.json:jakarta.json-api:2.1.3', + 'jsonPathImpl': 'org.eclipse.parsson:parsson:1.1.6', 'jsonSimple': 'com.googlecode.json-simple:json-simple:1.1.1', 'jsonSmart': 'net.minidev:json-smart:2.4.9', 'json': 'org.json:json:20231013', @@ -271,7 +272,7 @@ project.ext.externalDependency = [ 'jetbrains':' org.jetbrains.kotlin:kotlin-stdlib:1.6.0', 'annotationApi': 'javax.annotation:javax.annotation-api:1.3.2', 'jakartaAnnotationApi': 'jakarta.annotation:jakarta.annotation-api:3.0.0', - 'classGraph': 'io.github.classgraph:classgraph:4.8.168', + 'classGraph': 'io.github.classgraph:classgraph:4.8.172', ] allprojects { diff --git a/datahub-frontend/app/auth/AuthModule.java b/datahub-frontend/app/auth/AuthModule.java index 39357e7da12a7..7db8f5689ead5 100644 --- a/datahub-frontend/app/auth/AuthModule.java +++ b/datahub-frontend/app/auth/AuthModule.java @@ -62,6 +62,7 @@ public class AuthModule extends AbstractModule { private static final String PAC4J_SESSIONSTORE_PROVIDER_CONF = "pac4j.sessionStore.provider"; private static final String ENTITY_CLIENT_RETRY_INTERVAL = "entityClient.retryInterval"; private static final String ENTITY_CLIENT_NUM_RETRIES = "entityClient.numRetries"; + private static final String ENTITY_CLIENT_RESTLI_GET_BATCH_SIZE = "entityClient.restli.get.batchSize"; private static final String GET_SSO_SETTINGS_ENDPOINT = "auth/getSsoSettings"; private final com.typesafe.config.Config _configs; @@ -201,11 +202,13 @@ protected ConfigurationProvider provideConfigurationProvider() { protected SystemEntityClient provideEntityClient( @Named("systemOperationContext") final OperationContext systemOperationContext, final ConfigurationProvider configurationProvider) { + return new SystemRestliEntityClient( buildRestliClient(), new ExponentialBackoff(_configs.getInt(ENTITY_CLIENT_RETRY_INTERVAL)), _configs.getInt(ENTITY_CLIENT_NUM_RETRIES), - configurationProvider.getCache().getClient().getEntityClient()); + configurationProvider.getCache().getClient().getEntityClient(), + Math.max(1, _configs.getInt(ENTITY_CLIENT_RESTLI_GET_BATCH_SIZE))); } @Provides diff --git a/datahub-frontend/app/auth/sso/oidc/OidcCallbackLogic.java b/datahub-frontend/app/auth/sso/oidc/OidcCallbackLogic.java index 22f7ca20ab8b8..510804ba17f1a 100644 --- a/datahub-frontend/app/auth/sso/oidc/OidcCallbackLogic.java +++ b/datahub-frontend/app/auth/sso/oidc/OidcCallbackLogic.java @@ -211,6 +211,8 @@ private Result handleOidcCallback( "Failed to perform post authentication steps. Error message: %s", e.getMessage())); } + log.info("OIDC callback authentication successful for user: {}", userName); + // Successfully logged in - Generate GMS login token final String accessToken = authClient.generateSessionTokenForUser(corpUserUrn.getId()); return result diff --git a/datahub-frontend/app/client/AuthServiceClient.java b/datahub-frontend/app/client/AuthServiceClient.java index baa992994d8ba..30f841d10b4bf 100644 --- a/datahub-frontend/app/client/AuthServiceClient.java +++ b/datahub-frontend/app/client/AuthServiceClient.java @@ -75,7 +75,6 @@ public String generateSessionTokenForUser(@Nonnull final String userId) { CloseableHttpResponse response = null; try { - final String protocol = this.metadataServiceUseSsl ? "https" : "http"; final HttpPost request = new HttpPost( @@ -86,6 +85,8 @@ public String generateSessionTokenForUser(@Nonnull final String userId) { this.metadataServicePort, GENERATE_SESSION_TOKEN_ENDPOINT)); + log.info("Requesting session token for user: {}", userId); + // Build JSON request to generate a token on behalf of a user. final ObjectMapper objectMapper = new ObjectMapper(); final ObjectNode objectNode = objectMapper.createObjectNode(); @@ -100,7 +101,7 @@ public String generateSessionTokenForUser(@Nonnull final String userId) { response = httpClient.execute(request); final HttpEntity entity = response.getEntity(); if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK && entity != null) { - // Successfully generated a token for the User + log.info("Successfully received session token for user: {}", userId); final String jsonStr = EntityUtils.toString(entity); return getAccessTokenFromJson(jsonStr); } else { @@ -110,6 +111,7 @@ public String generateSessionTokenForUser(@Nonnull final String userId) { response.getStatusLine().toString(), response.getEntity().toString())); } } catch (Exception e) { + log.error("Failed to generate session token for user: {}", userId, e); throw new RuntimeException("Failed to generate session token for user", e); } finally { try { diff --git a/datahub-frontend/app/controllers/AuthenticationController.java b/datahub-frontend/app/controllers/AuthenticationController.java index d9568c25f6e8c..87c4b5ba06793 100644 --- a/datahub-frontend/app/controllers/AuthenticationController.java +++ b/datahub-frontend/app/controllers/AuthenticationController.java @@ -42,7 +42,6 @@ import play.mvc.Results; import security.AuthenticationManager; -// TODO add logging. public class AuthenticationController extends Controller { public static final String AUTH_VERBOSE_LOGGING = "auth.verbose.logging"; private static final String AUTH_REDIRECT_URI_PARAM = "redirect_uri"; @@ -183,10 +182,12 @@ public Result logIn(Http.Request request) { boolean loginSucceeded = tryLogin(username, password); if (!loginSucceeded) { + _logger.info("Login failed for user: {}", username); return Results.badRequest(invalidCredsJson); } final Urn actorUrn = new CorpuserUrn(username); + _logger.info("Login successful for user: {}, urn: {}", username, actorUrn); final String accessToken = _authClient.generateSessionTokenForUser(actorUrn.getId()); return createSession(actorUrn.toString(), accessToken); } @@ -250,6 +251,7 @@ public Result signUp(Http.Request request) { final Urn userUrn = new CorpuserUrn(email); final String userUrnString = userUrn.toString(); _authClient.signUp(userUrnString, fullName, email, title, password, inviteToken); + _logger.info("Signed up user {} using invite tokens", userUrnString); final String accessToken = _authClient.generateSessionTokenForUser(userUrn.getId()); return createSession(userUrnString, accessToken); } @@ -351,15 +353,15 @@ private boolean tryLogin(String username, String password) { // First try jaas login, if enabled if (_jaasConfigs.isJAASEnabled()) { try { - _logger.debug("Attempting jaas authentication"); + _logger.debug("Attempting JAAS authentication for user: {}", username); AuthenticationManager.authenticateJaasUser(username, password); - _logger.debug("Jaas authentication successful. Login succeeded"); + _logger.debug("JAAS authentication successful. Login succeeded"); loginSucceeded = true; } catch (Exception e) { if (_verbose) { - _logger.debug("Jaas authentication error. Login failed", e); + _logger.debug("JAAS authentication error. Login failed", e); } else { - _logger.debug("Jaas authentication error. Login failed"); + _logger.debug("JAAS authentication error. Login failed"); } } } diff --git a/datahub-frontend/conf/application.conf b/datahub-frontend/conf/application.conf index 0f4ddb7c497e6..6aa58d5b13b2c 100644 --- a/datahub-frontend/conf/application.conf +++ b/datahub-frontend/conf/application.conf @@ -288,4 +288,6 @@ systemClientSecret=${?DATAHUB_SYSTEM_CLIENT_SECRET} entityClient.retryInterval = 2 entityClient.retryInterval = ${?ENTITY_CLIENT_RETRY_INTERVAL} entityClient.numRetries = 3 -entityClient.numRetries = ${?ENTITY_CLIENT_NUM_RETRIES} \ No newline at end of file +entityClient.numRetries = ${?ENTITY_CLIENT_NUM_RETRIES} +entityClient.restli.get.batchSize = 100 +entityClient.restli.get.batchSize = ${?ENTITY_CLIENT_RESTLI_GET_BATCH_SIZE} \ No newline at end of file diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java index 5f555b45d3b09..0924dbc0c0a6d 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/Constants.java @@ -21,6 +21,7 @@ private Constants() {} public static final String PROPERTIES_SCHEMA_FILE = "properties.graphql"; public static final String FORMS_SCHEMA_FILE = "forms.graphql"; public static final String INCIDENTS_SCHEMA_FILE = "incident.graphql"; + public static final String CONNECTIONS_SCHEMA_FILE = "connection.graphql"; public static final String BROWSE_PATH_DELIMITER = "/"; public static final String BROWSE_PATH_V2_DELIMITER = "␟"; public static final String VERSION_STAMP_FIELD_NAME = "versionStamp"; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index e4418eade7a4c..1fb01e9ed0d52 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -48,6 +48,7 @@ import com.linkedin.datahub.graphql.generated.DashboardStatsSummary; import com.linkedin.datahub.graphql.generated.DashboardUserUsageCounts; import com.linkedin.datahub.graphql.generated.DataFlow; +import com.linkedin.datahub.graphql.generated.DataHubConnection; import com.linkedin.datahub.graphql.generated.DataHubView; import com.linkedin.datahub.graphql.generated.DataJob; import com.linkedin.datahub.graphql.generated.DataJobInputOutput; @@ -129,6 +130,7 @@ import com.linkedin.datahub.graphql.resolvers.chart.BrowseV2Resolver; import com.linkedin.datahub.graphql.resolvers.chart.ChartStatsSummaryResolver; import com.linkedin.datahub.graphql.resolvers.config.AppConfigResolver; +import com.linkedin.datahub.graphql.resolvers.connection.UpsertConnectionResolver; import com.linkedin.datahub.graphql.resolvers.container.ContainerEntitiesResolver; import com.linkedin.datahub.graphql.resolvers.container.ParentContainersResolver; import com.linkedin.datahub.graphql.resolvers.dashboard.DashboardStatsSummaryResolver; @@ -306,6 +308,7 @@ import com.linkedin.datahub.graphql.types.chart.ChartType; import com.linkedin.datahub.graphql.types.common.mappers.OperationMapper; import com.linkedin.datahub.graphql.types.common.mappers.UrnToEntityMapper; +import com.linkedin.datahub.graphql.types.connection.DataHubConnectionType; import com.linkedin.datahub.graphql.types.container.ContainerType; import com.linkedin.datahub.graphql.types.corpgroup.CorpGroupType; import com.linkedin.datahub.graphql.types.corpuser.CorpUserType; @@ -355,6 +358,7 @@ import com.linkedin.metadata.config.ViewsConfiguration; import com.linkedin.metadata.config.VisualConfiguration; import com.linkedin.metadata.config.telemetry.TelemetryConfiguration; +import com.linkedin.metadata.connection.ConnectionService; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.graph.GraphClient; import com.linkedin.metadata.graph.SiblingGraphService; @@ -439,6 +443,7 @@ public class GmsGraphQLEngine { private final ERModelRelationshipService erModelRelationshipService; private final FormService formService; private final RestrictedService restrictedService; + private ConnectionService connectionService; private final BusinessAttributeService businessAttributeService; private final FeatureFlags featureFlags; @@ -472,6 +477,7 @@ public class GmsGraphQLEngine { private final GlossaryTermType glossaryTermType; private final GlossaryNodeType glossaryNodeType; private final AspectType aspectType; + private final DataHubConnectionType connectionType; private final ContainerType containerType; private final DomainType domainType; private final NotebookType notebookType; @@ -497,6 +503,7 @@ public class GmsGraphQLEngine { private final int graphQLQueryComplexityLimit; private final int graphQLQueryDepthLimit; + private final boolean graphQLQueryIntrospectionEnabled; private final BusinessAttributeType businessAttributeType; @@ -557,6 +564,7 @@ public GmsGraphQLEngine(final GmsGraphQLEngineArgs args) { this.dataProductService = args.dataProductService; this.formService = args.formService; this.restrictedService = args.restrictedService; + this.connectionService = args.connectionService; this.businessAttributeService = args.businessAttributeService; this.ingestionConfiguration = Objects.requireNonNull(args.ingestionConfiguration); @@ -587,6 +595,7 @@ public GmsGraphQLEngine(final GmsGraphQLEngineArgs args) { this.glossaryTermType = new GlossaryTermType(entityClient); this.glossaryNodeType = new GlossaryNodeType(entityClient); this.aspectType = new AspectType(entityClient); + this.connectionType = new DataHubConnectionType(entityClient, secretService); this.containerType = new ContainerType(entityClient); this.domainType = new DomainType(entityClient); this.notebookType = new NotebookType(entityClient); @@ -612,6 +621,7 @@ public GmsGraphQLEngine(final GmsGraphQLEngineArgs args) { this.graphQLQueryComplexityLimit = args.graphQLQueryComplexityLimit; this.graphQLQueryDepthLimit = args.graphQLQueryDepthLimit; + this.graphQLQueryIntrospectionEnabled = args.graphQLQueryIntrospectionEnabled; this.businessAttributeType = new BusinessAttributeType(entityClient); // Init Lists @@ -634,6 +644,7 @@ public GmsGraphQLEngine(final GmsGraphQLEngineArgs args) { dataJobType, glossaryTermType, glossaryNodeType, + connectionType, containerType, notebookType, domainType, @@ -751,6 +762,7 @@ public void configureRuntimeWiring(final RuntimeWiring.Builder builder) { configureRoleResolvers(builder); configureBusinessAttributeResolver(builder); configureBusinessAttributeAssociationResolver(builder); + configureConnectionResolvers(builder); } private void configureOrganisationRoleResolvers(RuntimeWiring.Builder builder) { @@ -801,6 +813,7 @@ public GraphQLEngine.Builder builder() { .addSchema(fileBasedSchema(LINEAGE_SCHEMA_FILE)) .addSchema(fileBasedSchema(PROPERTIES_SCHEMA_FILE)) .addSchema(fileBasedSchema(FORMS_SCHEMA_FILE)) + .addSchema(fileBasedSchema(CONNECTIONS_SCHEMA_FILE)) .addSchema(fileBasedSchema(INCIDENTS_SCHEMA_FILE)); for (GmsGraphQLPlugin plugin : this.graphQLPlugins) { @@ -819,7 +832,8 @@ public GraphQLEngine.Builder builder() { .addDataLoader("Aspect", context -> createDataLoader(aspectType, context)) .configureRuntimeWiring(this::configureRuntimeWiring) .setGraphQLQueryComplexityLimit(graphQLQueryComplexityLimit) - .setGraphQLQueryDepthLimit(graphQLQueryDepthLimit); + .setGraphQLQueryDepthLimit(graphQLQueryDepthLimit) + .setGraphQLQueryIntrospectionEnabled(graphQLQueryIntrospectionEnabled); return builder; } @@ -3012,4 +3026,29 @@ private void configureBusinessAttributeAssociationResolver(final RuntimeWiring.B .getBusinessAttribute() .getUrn()))); } + + private void configureConnectionResolvers(final RuntimeWiring.Builder builder) { + builder.type( + "Mutation", + typeWiring -> + typeWiring.dataFetcher( + "upsertConnection", + new UpsertConnectionResolver(connectionService, secretService))); + builder.type( + "Query", + typeWiring -> typeWiring.dataFetcher("connection", getResolver(this.connectionType))); + builder.type( + "DataHubConnection", + typeWiring -> + typeWiring.dataFetcher( + "platform", + new LoadableTypeResolver<>( + this.dataPlatformType, + (env) -> { + final DataHubConnection connection = env.getSource(); + return connection.getPlatform() != null + ? connection.getPlatform().getUrn() + : null; + }))); + } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java index 199fa15ccbe01..d4d4d592d6bca 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java @@ -19,6 +19,7 @@ import com.linkedin.metadata.config.ViewsConfiguration; import com.linkedin.metadata.config.VisualConfiguration; import com.linkedin.metadata.config.telemetry.TelemetryConfiguration; +import com.linkedin.metadata.connection.ConnectionService; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.graph.GraphClient; import com.linkedin.metadata.graph.SiblingGraphService; @@ -82,7 +83,9 @@ public class GmsGraphQLEngineArgs { RestrictedService restrictedService; int graphQLQueryComplexityLimit; int graphQLQueryDepthLimit; + boolean graphQLQueryIntrospectionEnabled; BusinessAttributeService businessAttributeService; + ConnectionService connectionService; // any fork specific args should go below this line } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GraphQLEngine.java index 58b0be1d16c18..c72f82a8e1bf6 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GraphQLEngine.java @@ -16,6 +16,7 @@ import graphql.schema.idl.SchemaGenerator; import graphql.schema.idl.SchemaParser; import graphql.schema.idl.TypeDefinitionRegistry; +import graphql.schema.visibility.NoIntrospectionGraphqlFieldVisibility; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -44,15 +45,18 @@ public class GraphQLEngine { private final Map>> _dataLoaderSuppliers; private final int graphQLQueryComplexityLimit; private final int graphQLQueryDepthLimit; + private final boolean graphQLQueryIntrospectionEnabled; private GraphQLEngine( @Nonnull final List schemas, @Nonnull final RuntimeWiring runtimeWiring, @Nonnull final Map>> dataLoaderSuppliers, @Nonnull final int graphQLQueryComplexityLimit, - @Nonnull final int graphQLQueryDepthLimit) { + @Nonnull final int graphQLQueryDepthLimit, + @Nonnull final boolean graphQLQueryIntrospectionEnabled) { this.graphQLQueryComplexityLimit = graphQLQueryComplexityLimit; this.graphQLQueryDepthLimit = graphQLQueryDepthLimit; + this.graphQLQueryIntrospectionEnabled = graphQLQueryIntrospectionEnabled; _dataLoaderSuppliers = dataLoaderSuppliers; @@ -130,6 +134,7 @@ public static class Builder { private final RuntimeWiring.Builder _runtimeWiringBuilder = newRuntimeWiring(); private int graphQLQueryComplexityLimit = 2000; private int graphQLQueryDepthLimit = 50; + private boolean graphQLQueryIntrospectionEnabled = true; /** * Used to add a schema file containing the GQL types resolved by the engine. @@ -177,6 +182,9 @@ public Builder addDataLoaders( * any required data + type resolvers. */ public Builder configureRuntimeWiring(final Consumer builderFunc) { + if (!this.graphQLQueryIntrospectionEnabled) + _runtimeWiringBuilder.fieldVisibility( + NoIntrospectionGraphqlFieldVisibility.NO_INTROSPECTION_FIELD_VISIBILITY); builderFunc.accept(_runtimeWiringBuilder); return this; } @@ -191,6 +199,11 @@ public Builder setGraphQLQueryDepthLimit(final int queryDepthLimit) { return this; } + public Builder setGraphQLQueryIntrospectionEnabled(final boolean introspectionEnabled) { + this.graphQLQueryIntrospectionEnabled = introspectionEnabled; + return this; + } + /** Builds a {@link GraphQLEngine}. */ public GraphQLEngine build() { return new GraphQLEngine( @@ -198,7 +211,8 @@ public GraphQLEngine build() { _runtimeWiringBuilder.build(), _loaderSuppliers, graphQLQueryComplexityLimit, - graphQLQueryDepthLimit); + graphQLQueryDepthLimit, + graphQLQueryIntrospectionEnabled); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/connection/ConnectionMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/connection/ConnectionMapper.java new file mode 100644 index 0000000000000..a4ad332d5946d --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/connection/ConnectionMapper.java @@ -0,0 +1,104 @@ +package com.linkedin.datahub.graphql.resolvers.connection; + +import com.linkedin.common.DataPlatformInstance; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.DataMap; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.DataHubConnection; +import com.linkedin.datahub.graphql.generated.DataHubConnectionDetails; +import com.linkedin.datahub.graphql.generated.DataHubJsonConnection; +import com.linkedin.datahub.graphql.generated.DataPlatform; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.entity.EnvelopedAspectMap; +import com.linkedin.metadata.Constants; +import io.datahubproject.metadata.services.SecretService; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +public class ConnectionMapper { + /** + * Maps a GMS encrypted connection details object into the decrypted form returned by the GraphQL + * API. + * + *

Returns null if the Entity does not have the required aspects: dataHubConnectionDetails or + * dataPlatformInstance. + */ + @Nullable + public static DataHubConnection map( + @Nonnull final QueryContext context, + @Nonnull final EntityResponse entityResponse, + @Nonnull final SecretService secretService) { + // If the connection does not exist, simply return null + if (!hasAspects(entityResponse)) { + return null; + } + + final DataHubConnection result = new DataHubConnection(); + final Urn entityUrn = entityResponse.getUrn(); + final EnvelopedAspectMap aspects = entityResponse.getAspects(); + + result.setUrn(entityUrn.toString()); + result.setType(EntityType.DATAHUB_CONNECTION); + + final EnvelopedAspect envelopedAssertionInfo = + aspects.get(Constants.DATAHUB_CONNECTION_DETAILS_ASPECT_NAME); + if (envelopedAssertionInfo != null) { + result.setDetails( + mapConnectionDetails( + context, + new com.linkedin.connection.DataHubConnectionDetails( + envelopedAssertionInfo.getValue().data()), + secretService)); + } + final EnvelopedAspect envelopedPlatformInstance = + aspects.get(Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME); + if (envelopedPlatformInstance != null) { + final DataMap data = envelopedPlatformInstance.getValue().data(); + result.setPlatform(mapPlatform(new DataPlatformInstance(data))); + } + return result; + } + + private static DataHubConnectionDetails mapConnectionDetails( + @Nonnull final QueryContext context, + @Nonnull final com.linkedin.connection.DataHubConnectionDetails gmsDetails, + @Nonnull final SecretService secretService) { + final DataHubConnectionDetails result = new DataHubConnectionDetails(); + result.setType( + com.linkedin.datahub.graphql.generated.DataHubConnectionDetailsType.valueOf( + gmsDetails.getType().toString())); + if (gmsDetails.hasJson() && ConnectionUtils.canManageConnections(context)) { + result.setJson(mapJsonConnectionDetails(gmsDetails.getJson(), secretService)); + } + if (gmsDetails.hasName()) { + result.setName(gmsDetails.getName()); + } + return result; + } + + private static DataHubJsonConnection mapJsonConnectionDetails( + @Nonnull final com.linkedin.connection.DataHubJsonConnection gmsJsonConnection, + @Nonnull final SecretService secretService) { + final DataHubJsonConnection result = new DataHubJsonConnection(); + // Decrypt the BLOB! + result.setBlob(secretService.decrypt(gmsJsonConnection.getEncryptedBlob())); + return result; + } + + private static DataPlatform mapPlatform(final DataPlatformInstance platformInstance) { + // Set dummy platform to be resolved. + final DataPlatform partialPlatform = new DataPlatform(); + partialPlatform.setUrn(platformInstance.getPlatform().toString()); + return partialPlatform; + } + + private static boolean hasAspects(@Nonnull final EntityResponse response) { + return response.hasAspects() + && response.getAspects().containsKey(Constants.DATAHUB_CONNECTION_DETAILS_ASPECT_NAME) + && response.getAspects().containsKey(Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME); + } + + private ConnectionMapper() {} +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/connection/ConnectionUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/connection/ConnectionUtils.java new file mode 100644 index 0000000000000..bcdd6460ae75e --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/connection/ConnectionUtils.java @@ -0,0 +1,23 @@ +package com.linkedin.datahub.graphql.resolvers.connection; + +import com.datahub.authorization.AuthUtil; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.metadata.authorization.PoliciesConfig; +import javax.annotation.Nonnull; + +/** Utilities for working with DataHub Connections. */ +public class ConnectionUtils { + + /** + * Returns true if the user is able to read and or write connection between DataHub and external + * platforms. + */ + public static boolean canManageConnections(@Nonnull QueryContext context) { + return AuthUtil.isAuthorized( + context.getAuthorizer(), + context.getActorUrn(), + PoliciesConfig.MANAGE_CONNECTIONS_PRIVILEGE); + } + + private ConnectionUtils() {} +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/connection/UpsertConnectionResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/connection/UpsertConnectionResolver.java new file mode 100644 index 0000000000000..3aae612b8cb78 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/connection/UpsertConnectionResolver.java @@ -0,0 +1,78 @@ +package com.linkedin.datahub.graphql.resolvers.connection; + +import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; + +import com.datahub.authentication.Authentication; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.connection.DataHubConnectionDetailsType; +import com.linkedin.connection.DataHubJsonConnection; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.exception.AuthorizationException; +import com.linkedin.datahub.graphql.generated.DataHubConnection; +import com.linkedin.datahub.graphql.generated.UpsertDataHubConnectionInput; +import com.linkedin.entity.EntityResponse; +import com.linkedin.metadata.connection.ConnectionService; +import graphql.schema.DataFetcher; +import graphql.schema.DataFetchingEnvironment; +import io.datahubproject.metadata.services.SecretService; +import java.util.Objects; +import java.util.concurrent.CompletableFuture; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class UpsertConnectionResolver implements DataFetcher> { + + private final ConnectionService _connectionService; + private final SecretService _secretService; + + public UpsertConnectionResolver( + @Nonnull final ConnectionService connectionService, + @Nonnull final SecretService secretService) { + _connectionService = + Objects.requireNonNull(connectionService, "connectionService cannot be null"); + _secretService = Objects.requireNonNull(secretService, "secretService cannot be null"); + } + + @Override + public CompletableFuture get(final DataFetchingEnvironment environment) + throws Exception { + + final QueryContext context = environment.getContext(); + final UpsertDataHubConnectionInput input = + bindArgument(environment.getArgument("input"), UpsertDataHubConnectionInput.class); + final Authentication authentication = context.getAuthentication(); + + return CompletableFuture.supplyAsync( + () -> { + if (!ConnectionUtils.canManageConnections(context)) { + throw new AuthorizationException( + "Unauthorized to upsert Connection. Please contact your DataHub administrator for more information."); + } + + try { + final Urn connectionUrn = + _connectionService.upsertConnection( + context.getOperationContext(), + input.getId(), + UrnUtils.getUrn(input.getPlatformUrn()), + DataHubConnectionDetailsType.valueOf(input.getType().toString()), + input.getJson() != null + // Encrypt payload + ? new DataHubJsonConnection() + .setEncryptedBlob(_secretService.encrypt(input.getJson().getBlob())) + : null, + input.getName()); + + final EntityResponse connectionResponse = + _connectionService.getConnectionEntityResponse( + context.getOperationContext(), connectionUrn); + return ConnectionMapper.map(context, connectionResponse, _secretService); + } catch (Exception e) { + throw new RuntimeException( + String.format("Failed to upsert a Connection from input %s", input), e); + } + }); + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestionResolverUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestionResolverUtils.java index 800a41330346a..3c3fed846e56a 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestionResolverUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/IngestionResolverUtils.java @@ -65,6 +65,9 @@ public static ExecutionRequest mapExecutionRequest( inputResult.setArguments(StringMapMapper.map(context, executionRequestInput.getArgs())); } inputResult.setRequestedAt(executionRequestInput.getRequestedAt()); + if (executionRequestInput.getActorUrn() != null) { + inputResult.setActorUrn(executionRequestInput.getActorUrn().toString()); + } result.setInput(inputResult); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateIngestionExecutionRequestResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateIngestionExecutionRequestResolver.java index c5410ccaeecf6..39b8c65bdbd51 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateIngestionExecutionRequestResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateIngestionExecutionRequestResolver.java @@ -6,6 +6,7 @@ import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.SetMode; import com.linkedin.data.template.StringMap; import com.linkedin.datahub.graphql.QueryContext; @@ -113,6 +114,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) execInput.setExecutorId( ingestionSourceInfo.getConfig().getExecutorId(), SetMode.IGNORE_NULL); execInput.setRequestedAt(System.currentTimeMillis()); + execInput.setActorUrn(UrnUtils.getUrn(context.getActorUrn())); Map arguments = new HashMap<>(); String recipe = ingestionSourceInfo.getConfig().getRecipe(); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateTestConnectionRequestResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateTestConnectionRequestResolver.java index fd2977f0f49f7..de99044cb22ca 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateTestConnectionRequestResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateTestConnectionRequestResolver.java @@ -5,6 +5,7 @@ import static com.linkedin.metadata.Constants.*; import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.StringMap; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.exception.AuthorizationException; @@ -71,6 +72,7 @@ public CompletableFuture get(final DataFetchingEnvironment environment) execInput.setSource(new ExecutionRequestSource().setType(TEST_CONNECTION_SOURCE_NAME)); execInput.setExecutorId(DEFAULT_EXECUTOR_ID); execInput.setRequestedAt(System.currentTimeMillis()); + execInput.setActorUrn(UrnUtils.getUrn(context.getActorUrn())); Map arguments = new HashMap<>(); arguments.put( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/connection/DataHubConnectionType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/connection/DataHubConnectionType.java new file mode 100644 index 0000000000000..0a62d224c6513 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/connection/DataHubConnectionType.java @@ -0,0 +1,87 @@ +package com.linkedin.datahub.graphql.types.connection; + +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.DataHubConnection; +import com.linkedin.datahub.graphql.generated.Entity; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.resolvers.connection.ConnectionMapper; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.Constants; +import graphql.execution.DataFetcherResult; +import io.datahubproject.metadata.services.SecretService; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; + +public class DataHubConnectionType + implements com.linkedin.datahub.graphql.types.EntityType { + + static final Set ASPECTS_TO_FETCH = + ImmutableSet.of( + Constants.DATAHUB_CONNECTION_DETAILS_ASPECT_NAME, + Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME); + private final EntityClient _entityClient; + private final SecretService _secretService; + + public DataHubConnectionType( + @Nonnull final EntityClient entityClient, @Nonnull final SecretService secretService) { + _entityClient = Objects.requireNonNull(entityClient, "entityClient must not be null"); + _secretService = Objects.requireNonNull(secretService, "secretService must not be null"); + } + + @Override + public EntityType type() { + return EntityType.DATAHUB_CONNECTION; + } + + @Override + public Function getKeyProvider() { + return Entity::getUrn; + } + + @Override + public Class objectClass() { + return DataHubConnection.class; + } + + @Override + public List> batchLoad( + @Nonnull List urns, @Nonnull QueryContext context) throws Exception { + final List connectionUrns = + urns.stream().map(UrnUtils::getUrn).collect(Collectors.toList()); + try { + final Map entities = + _entityClient.batchGetV2( + context.getOperationContext(), + Constants.DATAHUB_CONNECTION_ENTITY_NAME, + new HashSet<>(connectionUrns), + ASPECTS_TO_FETCH); + + final List gmsResults = new ArrayList<>(); + for (Urn urn : connectionUrns) { + gmsResults.add(entities.getOrDefault(urn, null)); + } + return gmsResults.stream() + .map( + gmsResult -> + gmsResult == null + ? null + : DataFetcherResult.newResult() + .data(ConnectionMapper.map(context, gmsResult, _secretService)) + .build()) + .collect(Collectors.toList()); + } catch (Exception e) { + throw new RuntimeException("Failed to batch load Connections", e); + } + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeMapper.java index 48750082d3495..ffb14df5e800b 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeMapper.java @@ -15,37 +15,39 @@ public class EntityTypeMapper { static final Map ENTITY_TYPE_TO_NAME = ImmutableMap.builder() - .put(EntityType.DATASET, "dataset") - .put(EntityType.ROLE, "role") - .put(EntityType.CORP_USER, "corpuser") - .put(EntityType.CORP_GROUP, "corpGroup") - .put(EntityType.DATA_PLATFORM, "dataPlatform") - .put(EntityType.DASHBOARD, "dashboard") - .put(EntityType.CHART, "chart") - .put(EntityType.TAG, "tag") - .put(EntityType.DATA_FLOW, "dataFlow") - .put(EntityType.DATA_JOB, "dataJob") + .put(EntityType.DATASET, Constants.DATASET_ENTITY_NAME) + .put(EntityType.ROLE, Constants.ROLE_ENTITY_NAME) + .put(EntityType.CORP_USER, Constants.CORP_USER_ENTITY_NAME) + .put(EntityType.CORP_GROUP, Constants.CORP_GROUP_ENTITY_NAME) + .put(EntityType.DATA_PLATFORM, Constants.DATA_PLATFORM_ENTITY_NAME) + .put(EntityType.DASHBOARD, Constants.DASHBOARD_ENTITY_NAME) + .put(EntityType.CHART, Constants.CHART_ENTITY_NAME) + .put(EntityType.TAG, Constants.TAG_ENTITY_NAME) + .put(EntityType.DATA_FLOW, Constants.DATA_FLOW_ENTITY_NAME) + .put(EntityType.DATA_JOB, Constants.DATA_JOB_ENTITY_NAME) .put(EntityType.DATA_PROCESS_INSTANCE, Constants.DATA_PROCESS_INSTANCE_ENTITY_NAME) - .put(EntityType.GLOSSARY_TERM, "glossaryTerm") - .put(EntityType.GLOSSARY_NODE, "glossaryNode") - .put(EntityType.MLMODEL, "mlModel") - .put(EntityType.MLMODEL_GROUP, "mlModelGroup") - .put(EntityType.MLFEATURE_TABLE, "mlFeatureTable") - .put(EntityType.MLFEATURE, "mlFeature") - .put(EntityType.MLPRIMARY_KEY, "mlPrimaryKey") - .put(EntityType.CONTAINER, "container") - .put(EntityType.DOMAIN, "domain") - .put(EntityType.NOTEBOOK, "notebook") - .put(EntityType.DATA_PLATFORM_INSTANCE, "dataPlatformInstance") - .put(EntityType.TEST, "test") + .put(EntityType.GLOSSARY_TERM, Constants.GLOSSARY_TERM_ENTITY_NAME) + .put(EntityType.GLOSSARY_NODE, Constants.GLOSSARY_NODE_ENTITY_NAME) + .put(EntityType.MLMODEL, Constants.ML_MODEL_ENTITY_NAME) + .put(EntityType.MLMODEL_GROUP, Constants.ML_MODEL_GROUP_ENTITY_NAME) + .put(EntityType.MLFEATURE_TABLE, Constants.ML_FEATURE_TABLE_ENTITY_NAME) + .put(EntityType.MLFEATURE, Constants.ML_FEATURE_ENTITY_NAME) + .put(EntityType.MLPRIMARY_KEY, Constants.ML_PRIMARY_KEY_ENTITY_NAME) + .put(EntityType.CONTAINER, Constants.CONTAINER_ENTITY_NAME) + .put(EntityType.DOMAIN, Constants.DOMAIN_ENTITY_NAME) + .put(EntityType.NOTEBOOK, Constants.NOTEBOOK_ENTITY_NAME) + .put(EntityType.DATA_PLATFORM_INSTANCE, Constants.DATA_PLATFORM_INSTANCE_ENTITY_NAME) + .put(EntityType.TEST, Constants.TEST_ENTITY_NAME) .put(EntityType.ER_MODEL_RELATIONSHIP, Constants.ER_MODEL_RELATIONSHIP_ENTITY_NAME) .put(EntityType.DATAHUB_VIEW, Constants.DATAHUB_VIEW_ENTITY_NAME) .put(EntityType.DATA_PRODUCT, Constants.DATA_PRODUCT_ENTITY_NAME) - .put(EntityType.SCHEMA_FIELD, "schemaField") + .put(EntityType.SCHEMA_FIELD, Constants.SCHEMA_FIELD_ENTITY_NAME) .put(EntityType.STRUCTURED_PROPERTY, Constants.STRUCTURED_PROPERTY_ENTITY_NAME) .put(EntityType.ASSERTION, Constants.ASSERTION_ENTITY_NAME) .put(EntityType.RESTRICTED, Constants.RESTRICTED_ENTITY_NAME) .put(EntityType.BUSINESS_ATTRIBUTE, Constants.BUSINESS_ATTRIBUTE_ENTITY_NAME) + .put(EntityType.QUERY, Constants.QUERY_ENTITY_NAME) + .put(EntityType.POST, Constants.POST_ENTITY_NAME) .build(); private static final Map ENTITY_NAME_TO_TYPE = diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/MapperUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/MapperUtils.java index 6bda333256a4c..7dd12d62765c6 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/MapperUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mappers/MapperUtils.java @@ -14,7 +14,7 @@ import com.linkedin.datahub.graphql.generated.SearchSuggestion; import com.linkedin.datahub.graphql.types.common.mappers.UrnToEntityMapper; import com.linkedin.datahub.graphql.types.entitytype.EntityTypeMapper; -import com.linkedin.metadata.entity.validation.ValidationUtils; +import com.linkedin.metadata.entity.validation.ValidationApiUtils; import com.linkedin.metadata.search.SearchEntity; import com.linkedin.metadata.search.utils.SearchUtils; import java.net.URISyntaxException; @@ -89,7 +89,7 @@ public static List getMatchedFieldEntry( if (SearchUtils.isUrn(field.getValue())) { try { Urn urn = Urn.createFromString(field.getValue()); - ValidationUtils.validateUrn( + ValidationApiUtils.validateUrn( context.getOperationContext().getEntityRegistry(), urn); matchedField.setEntity(UrnToEntityMapper.map(context, urn)); } catch (IllegalArgumentException | URISyntaxException e) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/structuredproperty/StructuredPropertyMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/structuredproperty/StructuredPropertyMapper.java index b3abab5ed3d36..ff54131506a7c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/structuredproperty/StructuredPropertyMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/structuredproperty/StructuredPropertyMapper.java @@ -60,6 +60,7 @@ private void mapStructuredPropertyDefinition( definition.setQualifiedName(gmsDefinition.getQualifiedName()); definition.setCardinality( PropertyCardinality.valueOf(gmsDefinition.getCardinality().toString())); + definition.setImmutable(gmsDefinition.isImmutable()); definition.setValueType(createDataTypeEntity(gmsDefinition.getValueType())); if (gmsDefinition.hasDisplayName()) { definition.setDisplayName(gmsDefinition.getDisplayName()); diff --git a/datahub-graphql-core/src/main/resources/connection.graphql b/datahub-graphql-core/src/main/resources/connection.graphql new file mode 100644 index 0000000000000..1a7249485e69d --- /dev/null +++ b/datahub-graphql-core/src/main/resources/connection.graphql @@ -0,0 +1,130 @@ +# DataHub Connections-specific GraphQL types + +extend type Query { + """ + Get a set of connection details by URN. + This requires the 'Manage Connections' platform privilege. + Returns null if a connection with the provided urn does not exist. + """ + connection(urn: String!): DataHubConnection +} + +extend type Mutation { + """ + Upsert a particular connection. + This requires the 'Manage Connections' platform privilege. + """ + upsertConnection(input: UpsertDataHubConnectionInput!): DataHubConnection! +} + +""" +A connection between DataHub and an external Platform. +""" +type DataHubConnection implements Entity { + """ + The urn of the connection + """ + urn: String! + + """ + The standard Entity Type field + """ + type: EntityType! + + """ + The connection details + """ + details: DataHubConnectionDetails! + + """ + The external Data Platform associated with the connection + """ + platform: DataPlatform! + + """ + Not implemented! + """ + relationships(input: RelationshipsInput!): EntityRelationshipsResult +} + + +""" +The details of the Connection +""" +type DataHubConnectionDetails { + """ + The type or format of connection + """ + type: DataHubConnectionDetailsType! + + """ + A JSON-encoded connection. Present when type is JSON. + """ + json: DataHubJsonConnection + + """ + The name for this DataHub connection + """ + name: String +} + +""" +The type of a DataHub connection +""" +enum DataHubConnectionDetailsType { + """ + A json-encoded set of connection details. + """ + JSON +} + +""" +The details of a JSON Connection +""" +type DataHubJsonConnection { + """ + The JSON blob containing the specific connection details. + """ + blob: String! +} + +""" +Input required to upsert a new DataHub connection. +""" +input UpsertDataHubConnectionInput { + """ + An optional ID to use when creating the URN of the connection. If none is provided, + a random UUID will be generated automatically. + """ + id: String + + """ + The type or format of connection + """ + type: DataHubConnectionDetailsType! + + """ + Urn of the associated platform + """ + platformUrn: String! + + """ + A JSON-encoded connection. This must be present when type is JSON. + """ + json: DataHubJsonConnectionInput + + """ + An optional name for this connection entity + """ + name: String +} + +""" +The details of a JSON Connection +""" +input DataHubJsonConnectionInput { + """ + The JSON blob containing the specific connection details. + """ + blob: String! +} \ No newline at end of file diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index 296d62bc534a3..2afb42c649fec 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -1143,6 +1143,11 @@ enum EntityType { """ CUSTOM_OWNERSHIP_TYPE + """ + A connection to an external source. + """ + DATAHUB_CONNECTION + """ A DataHub incident - SaaS only """ @@ -2685,6 +2690,11 @@ enum FabricType { Designates corporation fabrics """ CORP + + """ + Designates review fabrics + """ + RVW } """ @@ -7761,10 +7771,33 @@ enum DatasetAssertionScope { } """ -The top-level assertion type. Currently single Dataset assertions are the only type supported. +The top-level assertion type. """ enum AssertionType { + """ + A single-dataset assertion. + """ DATASET + """ + An assertion which indicates when a particular operation should occur to an asset. + """ + FRESHNESS + """ + An assertion which indicates how much data should be available for a particular asset. + """ + VOLUME + """ + A raw SQL-statement based assertion. + """ + SQL + """ + A structured assertion targeting a specific column or field of the Dataset. + """ + FIELD + """ + A schema or structural assertion. + """ + DATA_SCHEMA } """ diff --git a/datahub-graphql-core/src/main/resources/ingestion.graphql b/datahub-graphql-core/src/main/resources/ingestion.graphql index d65343c0a16d2..77327ae6d4db1 100644 --- a/datahub-graphql-core/src/main/resources/ingestion.graphql +++ b/datahub-graphql-core/src/main/resources/ingestion.graphql @@ -117,6 +117,11 @@ type ExecutionRequestInput { The time at which the request was created """ requestedAt: Long! + + """ + Urn of the actor who created this execution request + """ + actorUrn: String } """ diff --git a/datahub-graphql-core/src/main/resources/properties.graphql b/datahub-graphql-core/src/main/resources/properties.graphql index 3bf0bbefc406d..120154e930d59 100644 --- a/datahub-graphql-core/src/main/resources/properties.graphql +++ b/datahub-graphql-core/src/main/resources/properties.graphql @@ -75,6 +75,11 @@ type StructuredPropertyDefinition { Entity types that this structured property can be applied to """ entityTypes: [EntityTypeEntity!]! + + """ + Whether or not this structured property is immutable + """ + immutable: Boolean! } """ diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/connection/UpsertConnectionResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/connection/UpsertConnectionResolverTest.java new file mode 100644 index 0000000000000..5bc5332e711fd --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/connection/UpsertConnectionResolverTest.java @@ -0,0 +1,128 @@ +package com.linkedin.datahub.graphql.resolvers.connection; + +import static com.linkedin.datahub.graphql.TestUtils.*; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.when; +import static org.testng.Assert.assertThrows; + +import com.google.common.collect.ImmutableMap; +import com.linkedin.common.DataPlatformInstance; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.connection.DataHubConnectionDetails; +import com.linkedin.connection.DataHubJsonConnection; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.DataHubConnection; +import com.linkedin.datahub.graphql.generated.DataHubConnectionDetailsType; +import com.linkedin.datahub.graphql.generated.DataHubJsonConnectionInput; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.generated.UpsertDataHubConnectionInput; +import com.linkedin.entity.Aspect; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.entity.EnvelopedAspectMap; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.connection.ConnectionService; +import graphql.schema.DataFetchingEnvironment; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.metadata.services.SecretService; +import java.util.concurrent.CompletionException; +import org.mockito.Mockito; +import org.testng.Assert; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class UpsertConnectionResolverTest { + + private ConnectionService connectionService; + private SecretService secretService; + private UpsertConnectionResolver resolver; + + @BeforeMethod + public void setUp() { + connectionService = Mockito.mock(ConnectionService.class); + secretService = Mockito.mock(SecretService.class); + Mockito.when(secretService.encrypt("{}")).thenReturn("encrypted"); + Mockito.when(secretService.decrypt("encrypted")).thenReturn("{}"); + resolver = new UpsertConnectionResolver(connectionService, secretService); + } + + @Test + public void testGetAuthorized() throws Exception { + // Mock inputs + Urn connectionUrn = UrnUtils.getUrn("urn:li:dataHubConnection:test-id"); + Urn platformUrn = UrnUtils.getUrn("urn:li:dataPlatform:slack"); + + final UpsertDataHubConnectionInput input = new UpsertDataHubConnectionInput(); + input.setId(connectionUrn.getId()); + input.setPlatformUrn(platformUrn.toString()); + input.setType(DataHubConnectionDetailsType.JSON); + input.setName("test-name"); + input.setJson(new DataHubJsonConnectionInput("{}")); + + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + final DataHubConnectionDetails details = + new DataHubConnectionDetails() + .setType(com.linkedin.connection.DataHubConnectionDetailsType.JSON) + .setJson(new DataHubJsonConnection().setEncryptedBlob("encrypted")); + + final DataPlatformInstance platformInstance = + new DataPlatformInstance().setPlatform(platformUrn); + + when(connectionService.upsertConnection( + any(OperationContext.class), + Mockito.eq(input.getId()), + Mockito.eq(platformUrn), + Mockito.eq(details.getType()), + Mockito.eq(details.getJson()), + Mockito.any(String.class))) + .thenReturn(connectionUrn); + when(connectionService.getConnectionEntityResponse( + any(OperationContext.class), Mockito.eq(connectionUrn))) + .thenReturn( + new EntityResponse() + .setUrn(connectionUrn) + .setEntityName(Constants.DATAHUB_CONNECTION_ENTITY_NAME) + .setAspects( + new EnvelopedAspectMap( + ImmutableMap.of( + Constants.DATAHUB_CONNECTION_DETAILS_ASPECT_NAME, + new EnvelopedAspect() + .setName(Constants.DATAHUB_CONNECTION_DETAILS_ASPECT_NAME) + .setValue(new Aspect(details.data())), + Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME, + new EnvelopedAspect() + .setName(Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME) + .setValue(new Aspect(platformInstance.data())))))); + + DataHubConnection actual = resolver.get(mockEnv).get(); + + Assert.assertEquals(actual.getType(), EntityType.DATAHUB_CONNECTION); + Assert.assertEquals(actual.getUrn(), connectionUrn.toString()); + Assert.assertEquals(actual.getPlatform().getUrn(), platformUrn.toString()); + Assert.assertEquals(actual.getDetails().getType(), input.getType()); + Assert.assertEquals(actual.getDetails().getJson().getBlob(), input.getJson().getBlob()); + } + + @Test + public void testGetUnAuthorized() { + // Mock inputs + Urn connectionUrn = UrnUtils.getUrn("urn:li:dataHubConnection:test-id"); + + final UpsertDataHubConnectionInput input = new UpsertDataHubConnectionInput(); + input.setId(connectionUrn.getId()); + input.setPlatformUrn(connectionUrn.toString()); + input.setType(DataHubConnectionDetailsType.JSON); + + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); + } +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/mappers/MapperUtilsTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/mappers/MapperUtilsTest.java index 927d5185a71c7..6d3291736f571 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/mappers/MapperUtilsTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/mappers/MapperUtilsTest.java @@ -9,7 +9,7 @@ import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.generated.MatchedField; -import com.linkedin.metadata.entity.validation.ValidationUtils; +import com.linkedin.metadata.entity.validation.ValidationApiUtils; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.snapshot.Snapshot; @@ -42,7 +42,7 @@ public void testMatchedFieldValidation() throws URISyntaxException { "urn:li:dataset:%28urn:li:dataPlatform:s3%2Ctest-datalake-concepts/prog_maintenance%2CPROD%29"); assertThrows( IllegalArgumentException.class, - () -> ValidationUtils.validateUrn(entityRegistry, invalidUrn)); + () -> ValidationApiUtils.validateUrn(entityRegistry, invalidUrn)); QueryContext mockContext = mock(QueryContext.class); when(mockContext.getOperationContext()) diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDataJobViaNodesCLLConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDataJobViaNodesCLLConfig.java index e7311d23a6d2a..4956254062ff9 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDataJobViaNodesCLLConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDataJobViaNodesCLLConfig.java @@ -4,6 +4,7 @@ import com.linkedin.datahub.upgrade.system.vianodes.ReindexDataJobViaNodesCLL; import com.linkedin.metadata.entity.AspectDao; import com.linkedin.metadata.entity.EntityService; +import io.datahubproject.metadata.context.OperationContext; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Conditional; @@ -15,6 +16,7 @@ public class ReindexDataJobViaNodesCLLConfig { @Bean public NonBlockingSystemUpgrade reindexDataJobViaNodesCLL( + final OperationContext opContext, final EntityService entityService, final AspectDao aspectDao, @Value("${systemUpdate.dataJobNodeCLL.enabled}") final boolean enabled, @@ -22,6 +24,6 @@ public NonBlockingSystemUpgrade reindexDataJobViaNodesCLL( @Value("${systemUpdate.dataJobNodeCLL.delayMs}") final Integer delayMs, @Value("${systemUpdate.dataJobNodeCLL.limit}") final Integer limit) { return new ReindexDataJobViaNodesCLL( - entityService, aspectDao, enabled, batchSize, delayMs, limit); + opContext, entityService, aspectDao, enabled, batchSize, delayMs, limit); } } diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStep.java index d7a1882656245..77d988f3176f2 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/restoreindices/SendMAEStep.java @@ -49,7 +49,8 @@ public KafkaJob(UpgradeContext context, RestoreIndicesArgs args) { @Override public RestoreIndicesResult call() { return _entityService - .streamRestoreIndices(context.opContext(), args, context.report()::addLine) + .restoreIndices(context.opContext(), args, context.report()::addLine) + .stream() .findFirst() .get(); } diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/AbstractMCLStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/AbstractMCLStep.java new file mode 100644 index 0000000000000..27e98259c8beb --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/AbstractMCLStep.java @@ -0,0 +1,152 @@ +package com.linkedin.datahub.upgrade.system; + +import static com.linkedin.metadata.Constants.DATA_HUB_UPGRADE_RESULT_ASPECT_NAME; + +import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.upgrade.UpgradeContext; +import com.linkedin.datahub.upgrade.UpgradeStep; +import com.linkedin.datahub.upgrade.UpgradeStepResult; +import com.linkedin.datahub.upgrade.impl.DefaultUpgradeStepResult; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.boot.BootstrapStep; +import com.linkedin.metadata.entity.AspectDao; +import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.EntityUtils; +import com.linkedin.metadata.entity.ebean.EbeanAspectV2; +import com.linkedin.metadata.entity.ebean.PartitionedStream; +import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs; +import com.linkedin.metadata.utils.AuditStampUtils; +import com.linkedin.util.Pair; +import io.datahubproject.metadata.context.OperationContext; +import java.util.List; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Future; +import java.util.function.Function; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.extern.slf4j.Slf4j; + +/** + * Generic upgrade step class for generating MCLs for a given aspect in order to update ES documents + */ +@Slf4j +public abstract class AbstractMCLStep implements UpgradeStep { + private final OperationContext opContext; + private final EntityService entityService; + private final AspectDao aspectDao; + + private final int batchSize; + private final int batchDelayMs; + private final int limit; + + public AbstractMCLStep( + OperationContext opContext, + EntityService entityService, + AspectDao aspectDao, + Integer batchSize, + Integer batchDelayMs, + Integer limit) { + this.opContext = opContext; + this.entityService = entityService; + this.aspectDao = aspectDao; + this.batchSize = batchSize; + this.batchDelayMs = batchDelayMs; + this.limit = limit; + } + + @Nonnull + protected abstract String getAspectName(); + + protected Urn getUpgradeIdUrn() { + return BootstrapStep.getUpgradeUrn(id()); + } + + /** Optionally apply an urn-like sql filter, otherwise all urns */ + @Nullable + protected abstract String getUrnLike(); + + @Override + public Function executable() { + return (context) -> { + + // re-using for configuring the sql scan + RestoreIndicesArgs args = + new RestoreIndicesArgs().aspectName(getAspectName()).batchSize(batchSize).limit(limit); + + if (getUrnLike() != null) { + args = args.urnLike(getUrnLike()); + } + + try (PartitionedStream stream = aspectDao.streamAspectBatches(args)) { + stream + .partition(args.batchSize) + .forEach( + batch -> { + log.info("Processing batch({}) of size {}.", getAspectName(), batchSize); + + List, Boolean>> futures; + + futures = + EntityUtils.toSystemAspectFromEbeanAspects( + opContext.getRetrieverContext().get(), + batch.collect(Collectors.toList())) + .stream() + .map( + systemAspect -> + entityService.alwaysProduceMCLAsync( + opContext, + systemAspect.getUrn(), + systemAspect.getUrn().getEntityType(), + getAspectName(), + systemAspect.getAspectSpec(), + null, + systemAspect.getRecordTemplate(), + null, + systemAspect + .getSystemMetadata() + .setRunId(id()) + .setLastObserved(System.currentTimeMillis()), + AuditStampUtils.createDefaultAuditStamp(), + ChangeType.UPSERT)) + .collect(Collectors.toList()); + + futures.forEach( + f -> { + try { + f.getFirst().get(); + } catch (InterruptedException | ExecutionException e) { + throw new RuntimeException(e); + } + }); + + if (batchDelayMs > 0) { + log.info("Sleeping for {} ms", batchDelayMs); + try { + Thread.sleep(batchDelayMs); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + }); + } + + BootstrapStep.setUpgradeResult(opContext, getUpgradeIdUrn(), entityService); + context.report().addLine("State updated: " + getUpgradeIdUrn()); + + return new DefaultUpgradeStepResult(id(), UpgradeStepResult.Result.SUCCEEDED); + }; + } + + @Override + /** Returns whether the upgrade should be skipped. */ + public boolean skip(UpgradeContext context) { + boolean previouslyRun = + entityService.exists( + opContext, getUpgradeIdUrn(), DATA_HUB_UPGRADE_RESULT_ASPECT_NAME, true); + if (previouslyRun) { + log.info("{} was already run. Skipping.", id()); + } + return previouslyRun; + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPreStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPreStep.java index 0695dbe4b1acb..c3c9981b1dd7e 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPreStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPreStep.java @@ -17,6 +17,7 @@ import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory; import com.linkedin.metadata.entity.AspectDao; +import com.linkedin.metadata.entity.EntityAspect; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ReindexConfig; import com.linkedin.metadata.shared.ElasticSearchIndexed; @@ -28,6 +29,7 @@ import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; +import java.util.stream.Stream; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.opensearch.OpenSearchStatusException; @@ -156,28 +158,34 @@ private boolean blockWrites(String indexName) throws InterruptedException, IOExc private static Set getActiveStructuredPropertiesDefinitions( AspectDao aspectDao) { - Set removedStructuredPropertyUrns = - aspectDao - .streamAspects(STRUCTURED_PROPERTY_ENTITY_NAME, STATUS_ASPECT_NAME) - .map( - entityAspect -> - Pair.of( - entityAspect.getUrn(), - RecordUtils.toRecordTemplate(Status.class, entityAspect.getMetadata()))) - .filter(status -> status.getSecond().isRemoved()) - .map(Pair::getFirst) - .collect(Collectors.toSet()); - - return aspectDao - .streamAspects(STRUCTURED_PROPERTY_ENTITY_NAME, STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME) - .map( - entityAspect -> - Pair.of( - entityAspect.getUrn(), - RecordUtils.toRecordTemplate( - StructuredPropertyDefinition.class, entityAspect.getMetadata()))) - .filter(definition -> !removedStructuredPropertyUrns.contains(definition.getKey())) - .map(Pair::getSecond) - .collect(Collectors.toSet()); + Set removedStructuredPropertyUrns; + try (Stream stream = + aspectDao.streamAspects(STRUCTURED_PROPERTY_ENTITY_NAME, STATUS_ASPECT_NAME)) { + removedStructuredPropertyUrns = + stream + .map( + entityAspect -> + Pair.of( + entityAspect.getUrn(), + RecordUtils.toRecordTemplate(Status.class, entityAspect.getMetadata()))) + .filter(status -> status.getSecond().isRemoved()) + .map(Pair::getFirst) + .collect(Collectors.toSet()); + } + + try (Stream stream = + aspectDao.streamAspects( + STRUCTURED_PROPERTY_ENTITY_NAME, STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME)) { + return stream + .map( + entityAspect -> + Pair.of( + entityAspect.getUrn(), + RecordUtils.toRecordTemplate( + StructuredPropertyDefinition.class, entityAspect.getMetadata()))) + .filter(definition -> !removedStructuredPropertyUrns.contains(definition.getKey())) + .map(Pair::getSecond) + .collect(Collectors.toSet()); + } } } diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/vianodes/ReindexDataJobViaNodesCLL.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/vianodes/ReindexDataJobViaNodesCLL.java index 9ad673c599758..fc0b44f57ab49 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/vianodes/ReindexDataJobViaNodesCLL.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/vianodes/ReindexDataJobViaNodesCLL.java @@ -5,7 +5,9 @@ import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade; import com.linkedin.metadata.entity.AspectDao; import com.linkedin.metadata.entity.EntityService; +import io.datahubproject.metadata.context.OperationContext; import java.util.List; +import javax.annotation.Nonnull; import lombok.extern.slf4j.Slf4j; /** @@ -18,6 +20,7 @@ public class ReindexDataJobViaNodesCLL implements NonBlockingSystemUpgrade { private final List _steps; public ReindexDataJobViaNodesCLL( + @Nonnull OperationContext opContext, EntityService entityService, AspectDao aspectDao, boolean enabled, @@ -28,7 +31,7 @@ public ReindexDataJobViaNodesCLL( _steps = ImmutableList.of( new ReindexDataJobViaNodesCLLStep( - entityService, aspectDao, batchSize, batchDelayMs, limit)); + opContext, entityService, aspectDao, batchSize, batchDelayMs, limit)); } else { _steps = ImmutableList.of(); } diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/vianodes/ReindexDataJobViaNodesCLLStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/vianodes/ReindexDataJobViaNodesCLLStep.java index 5e135124524d9..cf580670ee3a9 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/vianodes/ReindexDataJobViaNodesCLLStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/vianodes/ReindexDataJobViaNodesCLLStep.java @@ -2,148 +2,43 @@ import static com.linkedin.metadata.Constants.*; -import com.linkedin.common.urn.Urn; import com.linkedin.datahub.upgrade.UpgradeContext; -import com.linkedin.datahub.upgrade.UpgradeStep; -import com.linkedin.datahub.upgrade.UpgradeStepResult; -import com.linkedin.datahub.upgrade.impl.DefaultUpgradeStepResult; -import com.linkedin.events.metadata.ChangeType; -import com.linkedin.metadata.boot.BootstrapStep; +import com.linkedin.datahub.upgrade.system.AbstractMCLStep; import com.linkedin.metadata.entity.AspectDao; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.EntityUtils; -import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs; -import com.linkedin.metadata.models.AspectSpec; -import com.linkedin.metadata.utils.AuditStampUtils; -import com.linkedin.util.Pair; -import java.util.List; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.Future; -import java.util.function.Function; -import java.util.stream.Collectors; +import io.datahubproject.metadata.context.OperationContext; +import javax.annotation.Nonnull; import lombok.extern.slf4j.Slf4j; +import org.jetbrains.annotations.Nullable; @Slf4j -public class ReindexDataJobViaNodesCLLStep implements UpgradeStep { - - public static final String UPGRADE_ID = "via-node-cll-reindex-datajob-v3"; - private static final Urn UPGRADE_ID_URN = BootstrapStep.getUpgradeUrn(UPGRADE_ID); - - private final EntityService entityService; - private final AspectDao aspectDao; - private final int batchSize; - private final int batchDelayMs; - private final int limit; +public class ReindexDataJobViaNodesCLLStep extends AbstractMCLStep { public ReindexDataJobViaNodesCLLStep( + OperationContext opContext, EntityService entityService, AspectDao aspectDao, Integer batchSize, Integer batchDelayMs, Integer limit) { - this.entityService = entityService; - this.aspectDao = aspectDao; - this.batchSize = batchSize != null ? batchSize : 200; - this.batchDelayMs = batchDelayMs; - this.limit = limit; + super(opContext, entityService, aspectDao, batchSize, batchDelayMs, limit); } @Override - public Function executable() { - return (context) -> { - - // re-using for configuring the sql scan - RestoreIndicesArgs args = - new RestoreIndicesArgs() - .aspectName(DATA_JOB_INPUT_OUTPUT_ASPECT_NAME) - .urnLike("urn:li:" + DATA_JOB_ENTITY_NAME + ":%") - .batchSize(batchSize) - .limit(limit); - - final AspectSpec aspectSpec = - context - .opContext() - .getEntityRegistry() - .getAspectSpecs() - .get(DATA_JOB_INPUT_OUTPUT_ASPECT_NAME); - - aspectDao - .streamAspectBatches(args) - .forEach( - batch -> { - log.info("Processing batch of size {}.", batchSize); - - List, Boolean>> futures = - EntityUtils.toSystemAspectFromEbeanAspects( - context.opContext().getRetrieverContext().get(), - batch.collect(Collectors.toList())) - .stream() - .map( - systemAspect -> - entityService.alwaysProduceMCLAsync( - context.opContext(), - systemAspect.getUrn(), - systemAspect.getUrn().getEntityType(), - DATA_JOB_INPUT_OUTPUT_ASPECT_NAME, - aspectSpec, - null, - systemAspect.getRecordTemplate(), - null, - systemAspect - .getSystemMetadata() - .setRunId(UPGRADE_ID) - .setLastObserved(System.currentTimeMillis()), - AuditStampUtils.createDefaultAuditStamp(), - ChangeType.UPSERT)) - .collect(Collectors.toList()); - - futures.forEach( - f -> { - try { - f.getFirst().get(); - } catch (InterruptedException | ExecutionException e) { - throw new RuntimeException(e); - } - }); - - if (batchDelayMs > 0) { - log.info("Sleeping for {} ms", batchDelayMs); - try { - Thread.sleep(batchDelayMs); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - } - }); - - entityService - .streamRestoreIndices( - context.opContext(), args, x -> context.report().addLine((String) x)) - .forEach( - result -> { - context.report().addLine("Rows migrated: " + result.rowsMigrated); - context.report().addLine("Rows ignored: " + result.ignored); - }); - - BootstrapStep.setUpgradeResult(context.opContext(), UPGRADE_ID_URN, entityService); - context.report().addLine("State updated: " + UPGRADE_ID_URN); - - return new DefaultUpgradeStepResult(id(), UpgradeStepResult.Result.SUCCEEDED); - }; + public String id() { + return "via-node-cll-reindex-datajob-v3"; } + @Nonnull @Override - public String id() { - return UPGRADE_ID; + protected String getAspectName() { + return DATA_JOB_INPUT_OUTPUT_ASPECT_NAME; } - /** - * Returns whether the upgrade should proceed if the step fails after exceeding the maximum - * retries. - */ + @Nullable @Override - public boolean isOptional() { - return false; + protected String getUrnLike() { + return "urn:li:" + DATA_JOB_ENTITY_NAME + ":%"; } @Override @@ -152,17 +47,11 @@ public boolean isOptional() { * variable SKIP_REINDEX_DATA_JOB_INPUT_OUTPUT to determine whether to skip. */ public boolean skip(UpgradeContext context) { - boolean previouslyRun = - entityService.exists( - context.opContext(), UPGRADE_ID_URN, DATA_HUB_UPGRADE_RESULT_ASPECT_NAME, true); boolean envFlagRecommendsSkip = Boolean.parseBoolean(System.getenv("SKIP_REINDEX_DATA_JOB_INPUT_OUTPUT")); - if (previouslyRun) { - log.info("{} was already run. Skipping.", id()); - } if (envFlagRecommendsSkip) { log.info("Environment variable SKIP_REINDEX_DATA_JOB_INPUT_OUTPUT is set to true. Skipping."); } - return (previouslyRun || envFlagRecommendsSkip); + return (super.skip(context) || envFlagRecommendsSkip); } } diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNonBlockingTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNonBlockingTest.java index c28ff7fd29dfb..154b1de71f46c 100644 --- a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNonBlockingTest.java +++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNonBlockingTest.java @@ -19,6 +19,7 @@ import com.linkedin.metadata.entity.EntityServiceImpl; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs; import com.linkedin.mxe.Topics; +import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.test.metadata.context.TestOperationContexts; import java.util.List; import javax.inject.Named; @@ -58,6 +59,8 @@ public class DatahubUpgradeNonBlockingTest extends AbstractTestNGSpringContextTe @Autowired private EntityServiceImpl entityService; + @Autowired private OperationContext opContext; + @Test public void testSystemUpdateNonBlockingInit() { assertNotNull(systemUpdateNonBlocking); @@ -76,7 +79,7 @@ public void testReindexDataJobViaNodesCLLPaging() { AspectDao mockAspectDao = mock(AspectDao.class); ReindexDataJobViaNodesCLL cllUpgrade = - new ReindexDataJobViaNodesCLL(mockService, mockAspectDao, true, 10, 0, 0); + new ReindexDataJobViaNodesCLL(opContext, mockService, mockAspectDao, true, 10, 0, 0); SystemUpdateNonBlocking upgrade = new SystemUpdateNonBlocking(List.of(), List.of(cllUpgrade), null); DefaultUpgradeManager manager = new DefaultUpgradeManager(); diff --git a/datahub-web-react/src/app/entity/shared/entityForm/Form.tsx b/datahub-web-react/src/app/entity/shared/entityForm/Form.tsx index 9829cac9befe4..88b3c25162ec5 100644 --- a/datahub-web-react/src/app/entity/shared/entityForm/Form.tsx +++ b/datahub-web-react/src/app/entity/shared/entityForm/Form.tsx @@ -15,6 +15,7 @@ import FormRequestedBy from './FormSelectionModal/FormRequestedBy'; import useHasComponentRendered from '../../../shared/useHasComponentRendered'; import Loading from '../../../shared/Loading'; import { DeferredRenderComponent } from '../../../shared/DeferredRenderComponent'; +import { Editor } from '../tabs/Documentation/components/editor/Editor'; const TabWrapper = styled.div` background-color: ${ANTD_GRAY_V2[1]}; @@ -70,7 +71,9 @@ function Form({ formUrn }: Props) { )} {description ? ( - {description} + + + ) : ( Please fill out the following information for this {entityRegistry.getEntityName(entityType)} so diff --git a/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/editor/Editor.tsx b/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/editor/Editor.tsx index 5a02067deb33d..fe2a8c51f9377 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/editor/Editor.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/editor/Editor.tsx @@ -42,10 +42,11 @@ type EditorProps = { doNotFocus?: boolean; dataTestId?: string; onKeyDown?: (event: React.KeyboardEvent) => void; + editorStyle?: string; }; export const Editor = forwardRef((props: EditorProps, ref) => { - const { content, readOnly, onChange, className, dataTestId, onKeyDown } = props; + const { content, readOnly, onChange, className, dataTestId, onKeyDown, editorStyle } = props; const { manager, state, getContext } = useRemirror({ extensions: () => [ new BlockquoteExtension(), @@ -100,7 +101,7 @@ export const Editor = forwardRef((props: EditorProps, ref) => { }, [readOnly, content]); return ( - + {!readOnly && ( diff --git a/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/editor/EditorTheme.tsx b/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/editor/EditorTheme.tsx index c37e50d382435..ec094fb84e59a 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/editor/EditorTheme.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Documentation/components/editor/EditorTheme.tsx @@ -47,7 +47,7 @@ export const EditorTheme: RemirrorThemeType = { }, }; -export const EditorContainer = styled.div` +export const EditorContainer = styled.div<{ editorStyle?: string }>` ${extensionBlockquoteStyledCss} ${extensionCalloutStyledCss} ${extensionCodeBlockStyledCss} @@ -81,6 +81,7 @@ export const EditorContainer = styled.div` line-height: 1.5; white-space: pre-wrap; margin: 0; + ${props => props.editorStyle} a { font-weight: 500; diff --git a/datahub-web-react/src/app/entity/shared/tabs/Properties/Edit/EditColumn.tsx b/datahub-web-react/src/app/entity/shared/tabs/Properties/Edit/EditColumn.tsx index 7ff08e3813863..ac50df6a5381e 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Properties/Edit/EditColumn.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Properties/Edit/EditColumn.tsx @@ -10,7 +10,7 @@ interface Props { export function EditColumn({ propertyRow }: Props) { const [isEditModalVisible, setIsEditModalVisible] = useState(false); - if (!propertyRow.structuredProperty) { + if (!propertyRow.structuredProperty || propertyRow.structuredProperty?.definition.immutable) { return null; } diff --git a/datahub-web-react/src/graphql/connection.graphql b/datahub-web-react/src/graphql/connection.graphql new file mode 100644 index 0000000000000..02f87f08c519f --- /dev/null +++ b/datahub-web-react/src/graphql/connection.graphql @@ -0,0 +1,29 @@ +mutation upsertConnection($input: UpsertDataHubConnectionInput!) { + upsertConnection(input: $input) { + urn + details { + type + json { + blob + } + } + platform { + ...platformFields + } + } +} + +query connection($urn: String!) { + connection(urn: $urn) { + urn + details { + type + json { + blob + } + } + platform { + ...platformFields + } + } +} diff --git a/datahub-web-react/src/graphql/fragments.graphql b/datahub-web-react/src/graphql/fragments.graphql index 7028ac8c4f4d0..b28150a47b753 100644 --- a/datahub-web-react/src/graphql/fragments.graphql +++ b/datahub-web-react/src/graphql/fragments.graphql @@ -1245,6 +1245,7 @@ fragment structuredPropertyFields on StructuredPropertyEntity { qualifiedName description cardinality + immutable valueType { info { type diff --git a/datahub-web-react/src/graphql/ingestion.graphql b/datahub-web-react/src/graphql/ingestion.graphql index 4d6f090b99356..c172ccdbe7632 100644 --- a/datahub-web-react/src/graphql/ingestion.graphql +++ b/datahub-web-react/src/graphql/ingestion.graphql @@ -33,6 +33,7 @@ query listIngestionSources($input: ListIngestionSourcesInput!) { id input { requestedAt + actorUrn } result { status @@ -76,6 +77,7 @@ query getIngestionSource($urn: String!, $runStart: Int, $runCount: Int) { id input { requestedAt + actorUrn source { type } @@ -98,6 +100,7 @@ query getIngestionExecutionRequest($urn: String!) { source { type } + actorUrn arguments { key value diff --git a/docker/datahub-gms/Dockerfile b/docker/datahub-gms/Dockerfile index c62ab3c9e3bfa..a9399a24f3fbf 100644 --- a/docker/datahub-gms/Dockerfile +++ b/docker/datahub-gms/Dockerfile @@ -57,6 +57,7 @@ COPY war.war /datahub/datahub-gms/bin/war.war COPY metadata-models/src/main/resources/entity-registry.yml /datahub/datahub-gms/resources/entity-registry.yml COPY docker/datahub-gms/start.sh /datahub/datahub-gms/scripts/start.sh COPY docker/datahub-gms/jetty.xml /datahub/datahub-gms/scripts/jetty.xml +COPY docker/datahub-gms/jetty-jmx.xml /datahub/datahub-gms/scripts/jetty-jmx.xml COPY docker/monitoring/client-prometheus-config.yaml /datahub/datahub-gms/scripts/prometheus-config.yaml RUN chmod +x /datahub/datahub-gms/scripts/start.sh diff --git a/docker/datahub-gms/jetty-jmx.xml b/docker/datahub-gms/jetty-jmx.xml new file mode 100644 index 0000000000000..5aadbb66a70ed --- /dev/null +++ b/docker/datahub-gms/jetty-jmx.xml @@ -0,0 +1,31 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docker/datahub-gms/start.sh b/docker/datahub-gms/start.sh index ef1ab2d71a0b7..c91580eed83cb 100755 --- a/docker/datahub-gms/start.sh +++ b/docker/datahub-gms/start.sh @@ -63,9 +63,11 @@ COMMON=" $OTEL_AGENT \ $PROMETHEUS_AGENT \ -jar /jetty-runner.jar \ + --stats unsecure \ --jar jetty-util.jar \ --jar jetty-jmx.jar \ --config /datahub/datahub-gms/scripts/jetty.xml \ + --config /datahub/datahub-gms/scripts/jetty-jmx.xml \ /datahub/datahub-gms/bin/war.war" if [[ $SKIP_ELASTICSEARCH_CHECK != true ]]; then diff --git a/docker/docker-compose.dev.yml b/docker/docker-compose.dev.yml index b6ac43a9eda43..7974b66ec87db 100644 --- a/docker/docker-compose.dev.yml +++ b/docker/docker-compose.dev.yml @@ -51,6 +51,7 @@ services: volumes: - ./datahub-gms/start.sh:/datahub/datahub-gms/scripts/start.sh - ./datahub-gms/jetty.xml:/datahub/datahub-gms/scripts/jetty.xml + - ./datahub-gms/jetty-jmx.xml:/datahub/datahub-gms/scripts/jetty-jmx.xml - ./monitoring/client-prometheus-config.yaml:/datahub/datahub-gms/scripts/prometheus-config.yaml - ../metadata-models/src/main/resources/:/datahub/datahub-gms/resources - ../metadata-service/war/build/libs/:/datahub/datahub-gms/bin diff --git a/docker/profiles/docker-compose.actions.yml b/docker/profiles/docker-compose.actions.yml index 64ad5d9211ed8..c0a0fd5932871 100644 --- a/docker/profiles/docker-compose.actions.yml +++ b/docker/profiles/docker-compose.actions.yml @@ -5,7 +5,7 @@ x-datahub-actions-service: &datahub-actions-service env_file: - datahub-actions/env/docker.env - ${DATAHUB_LOCAL_COMMON_ENV:-empty.env} - - ${DATAHUB_LOCAL_ACTIONS_ENV:-empty.env} + - ${DATAHUB_LOCAL_ACTIONS_ENV:-empty2.env} environment: ACTIONS_EXTRA_PACKAGES: ${ACTIONS_EXTRA_PACKAGES:-} ACTIONS_CONFIG: ${ACTIONS_CONFIG:-} diff --git a/docker/profiles/docker-compose.frontend.yml b/docker/profiles/docker-compose.frontend.yml index a0239402cad86..b43db8297cb1e 100644 --- a/docker/profiles/docker-compose.frontend.yml +++ b/docker/profiles/docker-compose.frontend.yml @@ -7,7 +7,7 @@ x-datahub-frontend-service: &datahub-frontend-service env_file: - datahub-frontend/env/docker.env - ${DATAHUB_LOCAL_COMMON_ENV:-empty.env} - - ${DATAHUB_LOCAL_FRONTEND_ENV:-empty.env} + - ${DATAHUB_LOCAL_FRONTEND_ENV:-empty2.env} environment: &datahub-frontend-service-env KAFKA_BOOTSTRAP_SERVER: broker:29092 volumes: diff --git a/docker/profiles/docker-compose.gms.yml b/docker/profiles/docker-compose.gms.yml index 7950c12cfbb31..76bdcacd2dfc9 100644 --- a/docker/profiles/docker-compose.gms.yml +++ b/docker/profiles/docker-compose.gms.yml @@ -61,7 +61,7 @@ x-datahub-system-update-service: &datahub-system-update-service env_file: - datahub-upgrade/env/docker.env - ${DATAHUB_LOCAL_COMMON_ENV:-empty.env} - - ${DATAHUB_LOCAL_SYS_UPDATE_ENV:-empty.env} + - ${DATAHUB_LOCAL_SYS_UPDATE_ENV:-empty2.env} environment: &datahub-system-update-env <<: [*primary-datastore-mysql-env, *graph-datastore-search-env, *search-datastore-env, *kafka-env] SCHEMA_REGISTRY_SYSTEM_UPDATE: ${SCHEMA_REGISTRY_SYSTEM_UPDATE:-true} @@ -96,9 +96,11 @@ x-datahub-gms-service: &datahub-gms-service env_file: - datahub-gms/env/docker.env - ${DATAHUB_LOCAL_COMMON_ENV:-empty.env} - - ${DATAHUB_LOCAL_GMS_ENV:-empty.env} + - ${DATAHUB_LOCAL_GMS_ENV:-empty2.env} environment: &datahub-gms-env <<: [*primary-datastore-mysql-env, *graph-datastore-search-env, *search-datastore-env, *datahub-quickstart-telemetry-env, *kafka-env] + ELASTICSEARCH_QUERY_CUSTOM_CONFIG_ENABLED: true + ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE: '/etc/datahub/search/search_config.yaml' healthcheck: test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health start_period: 90s @@ -107,6 +109,7 @@ x-datahub-gms-service: &datahub-gms-service timeout: 5s volumes: - ${HOME}/.datahub/plugins:/etc/datahub/plugins + - ${HOME}/.datahub/search:/etc/datahub/search labels: io.datahubproject.datahub.component: "gms" @@ -127,10 +130,12 @@ x-datahub-gms-service-dev: &datahub-gms-service-dev volumes: - ./datahub-gms/start.sh:/datahub/datahub-gms/scripts/start.sh - ./datahub-gms/jetty.xml:/datahub/datahub-gms/scripts/jetty.xml + - ./datahub-gms/jetty-jmx.xml:/datahub/datahub-gms/scripts/jetty-jmx.xml - ./monitoring/client-prometheus-config.yaml:/datahub/datahub-gms/scripts/prometheus-config.yaml - ../../metadata-models/src/main/resources/:/datahub/datahub-gms/resources - ../../metadata-service/war/build/libs/:/datahub/datahub-gms/bin - ${HOME}/.datahub/plugins:/etc/datahub/plugins + - ${HOME}/.datahub/search:/etc/datahub/search ################################# # MAE Consumer @@ -143,7 +148,7 @@ x-datahub-mae-consumer-service: &datahub-mae-consumer-service env_file: - datahub-mae-consumer/env/docker.env - ${DATAHUB_LOCAL_COMMON_ENV:-empty.env} - - ${DATAHUB_LOCAL_MAE_ENV:-empty.env} + - ${DATAHUB_LOCAL_MAE_ENV:-empty2.env} environment: &datahub-mae-consumer-env <<: [*primary-datastore-mysql-env, *graph-datastore-search-env, *search-datastore-env, *kafka-env] @@ -169,7 +174,7 @@ x-datahub-mce-consumer-service: &datahub-mce-consumer-service env_file: - datahub-mce-consumer/env/docker.env - ${DATAHUB_LOCAL_COMMON_ENV:-empty.env} - - ${DATAHUB_LOCAL_MCE_ENV:-empty.env} + - ${DATAHUB_LOCAL_MCE_ENV:-empty2.env} environment: &datahub-mce-consumer-env <<: [*primary-datastore-mysql-env, *graph-datastore-search-env, *search-datastore-env, *datahub-quickstart-telemetry-env, *kafka-env] @@ -419,4 +424,4 @@ services: - debug-consumers depends_on: datahub-gms-debug-consumers: - condition: service_healthy \ No newline at end of file + condition: service_healthy diff --git a/docker/profiles/docker-compose.yml b/docker/profiles/docker-compose.yml index 534ca9702e2d7..4d12061196fbf 100644 --- a/docker/profiles/docker-compose.yml +++ b/docker/profiles/docker-compose.yml @@ -1,5 +1,4 @@ --- -version: '3.9' name: datahub include: diff --git a/docker/profiles/empty.env b/docker/profiles/empty.env index 6a3aaf83f8378..f07970153f455 100644 --- a/docker/profiles/empty.env +++ b/docker/profiles/empty.env @@ -1,4 +1,5 @@ -# Docker compose requires that all env_file entries exist. +# Docker compose requires that all env_file entries exist and +# are unique. # Because we have some optional env_file entries that can be set # as environment variables, we need a default file to point at # when those are not set. diff --git a/docker/profiles/empty2.env b/docker/profiles/empty2.env new file mode 100644 index 0000000000000..acd68426f0985 --- /dev/null +++ b/docker/profiles/empty2.env @@ -0,0 +1 @@ +# See empty.env. diff --git a/docs-website/docusaurus.config.js b/docs-website/docusaurus.config.js index 3b18f4e9a5abe..892f2b2d14576 100644 --- a/docs-website/docusaurus.config.js +++ b/docs-website/docusaurus.config.js @@ -110,7 +110,7 @@ module.exports = { label: "Demo", }, { - href: "https://www.acryldata.io/blog", + href: "https://blog.datahubproject.io/", label: "Blog", }, { diff --git a/docs-website/filterTagIndexes.json b/docs-website/filterTagIndexes.json index 64cb734e6d984..0c1f541cf53d3 100644 --- a/docs-website/filterTagIndexes.json +++ b/docs-website/filterTagIndexes.json @@ -77,6 +77,17 @@ "Features": "" } }, + { + "Path": "docs/lineage/dagster", + "imgPath": "img/logos/platforms/dagster.svg", + "Title": "Dagster", + "Description": "Dagster is a next-generation open source orchestration platform for the development, production, and observation of data assets..", + "tags": { + "Platform Type": "Orchestrator", + "Connection Type": "Pull", + "Features": "Stateful Ingestion, UI Ingestion, Status Aspect" + } + }, { "Path": "docs/generated/ingestion/sources/databricks", "imgPath": "img/logos/platforms/databricks.png", @@ -433,7 +444,7 @@ "Path": "docs/generated/ingestion/sources/hive-metastore", "imgPath": "img/logos/platforms/presto.svg", "Title": "Hive Metastore", - "Description": "Presto on Hive is a data tool that allows users to query and analyze large datasets stored in Hive using SQL-like syntax.", + "Description": "Hive Metastore (HMS) is a service that stores metadata that is related to Hive, Presto, Trino and other services in a backend Relational Database Management System (RDBMS) ", "tags": { "Platform Type": "Datastore", "Connection Type": "Pull", @@ -551,7 +562,7 @@ } }, { - "Path": "docs/metadata-integration/java/spark-lineage", + "Path": "docs/metadata-integration/java/spark-lineage-beta", "imgPath": "img/logos/platforms/spark.svg", "Title": "Spark", "Description": "Spark is a data processing tool that enables fast and efficient processing of large-scale data sets using distributed computing.", diff --git a/docs-website/graphql/generateGraphQLSchema.sh b/docs-website/graphql/generateGraphQLSchema.sh index c6d7ec528b613..da14fbc337f90 100755 --- a/docs-website/graphql/generateGraphQLSchema.sh +++ b/docs-website/graphql/generateGraphQLSchema.sh @@ -17,4 +17,5 @@ cat ../../datahub-graphql-core/src/main/resources/timeline.graphql >> combined.g cat ../../datahub-graphql-core/src/main/resources/step.graphql >> combined.graphql cat ../../datahub-graphql-core/src/main/resources/lineage.graphql >> combined.graphql cat ../../datahub-graphql-core/src/main/resources/properties.graphql >> combined.graphql -cat ../../datahub-graphql-core/src/main/resources/forms.graphql >> combined.graphql \ No newline at end of file +cat ../../datahub-graphql-core/src/main/resources/forms.graphql >> combined.graphql +cat ../../datahub-graphql-core/src/main/resources/connection.graphql >> combined.graphql \ No newline at end of file diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index 865b37c961a71..5c71e79a10172 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -43,70 +43,144 @@ module.exports = { description: "Learn about the features of DataHub.", }, items: [ - "docs/ui-ingestion", - "docs/how/search", - "docs/schema-history", // "docs/how/ui-tabs-guide", - "docs/domains", - "docs/dataproducts", - "docs/glossary/business-glossary", - "docs/tags", - "docs/ownership/ownership-types", - "docs/authorization/access-policies-guide", - "docs/features/dataset-usage-and-query-history", - "docs/posts", - "docs/sync-status", - "docs/incidents/incidents", - "docs/generated/lineage/lineage-feature-guide", - "docs/businessattributes", { + label: "Assertions", + type: "category", + link: { type: "doc", id: "docs/managed-datahub/observe/assertions" }, + items: [ + { + label: "Column Assertions", + type: "doc", + id: "docs/managed-datahub/observe/column-assertions", + className: "saasOnly", + }, + { + label: "Custom SQL Assertions", + type: "doc", + id: "docs/managed-datahub/observe/custom-sql-assertions", + className: "saasOnly", + }, + { + label: "Freshness Assertions", + type: "doc", + id: "docs/managed-datahub/observe/freshness-assertions", + className: "saasOnly", + }, + { + label: "Schema Assertions", + type: "doc", + id: "docs/managed-datahub/observe/schema-assertions", + className: "saasOnly", + }, + { + label: "Volume Assertions", + type: "doc", + id: "docs/managed-datahub/observe/volume-assertions", + className: "saasOnly", + }, + ], + }, + { + label: "Business Attributes", type: "doc", - id: "docs/tests/metadata-tests", - className: "saasOnly", + id: "docs/businessattributes", + }, + { + label: "Business Glossary", + type: "doc", + id: "docs/glossary/business-glossary", + }, + { + label: "Data Contract", + type: "doc", + id: "docs/managed-datahub/observe/data-contract", + }, + { + label: "Data Products", + type: "doc", + id: "docs/dataproducts", + }, + { + label: "Dataset Usage and Query History", + type: "doc", + id: "docs/features/dataset-usage-and-query-history", + }, + { + label: "Domains", + type: "doc", + id: "docs/domains", + }, + { + label: "Incidents", + type: "doc", + id: "docs/incidents/incidents", + }, + { + label: "Ingestion", + type: "doc", + id: "docs/ui-ingestion", }, - "docs/act-on-metadata/impact-analysis", { - label: "Observability", + label: "Lineage", type: "category", + link: { + type: "doc", + id: "docs/generated/lineage/lineage-feature-guide", + }, items: [ { - label: "Assertions", - type: "category", - link: { - type: "doc", - id: "docs/managed-datahub/observe/assertions", - }, - items: [ - { - type: "doc", - id: "docs/managed-datahub/observe/freshness-assertions", - className: "saasOnly", - }, - { - type: "doc", - id: "docs/managed-datahub/observe/volume-assertions", - className: "saasOnly", - }, - { - type: "doc", - id: "docs/managed-datahub/observe/custom-sql-assertions", - className: "saasOnly", - }, - { - type: "doc", - id: "docs/managed-datahub/observe/column-assertions", - className: "saasOnly", - }, - ], + label: "Lineage Impact analysis", + type: "doc", + id: "docs/act-on-metadata/impact-analysis", }, { + label: "Managing Lineage via UI", type: "doc", - id: "docs/managed-datahub/observe/data-contract", + id: "docs/features/feature-guides/ui-lineage", }, ], }, { - Guides: ["docs/features/feature-guides/ui-lineage"], + label: "Metadata Tests", + type: "doc", + id: "docs/tests/metadata-tests", + className: "saasOnly", + }, + { + label: "Ownership", + type: "doc", + id: "docs/ownership/ownership-types", + }, + { + label: "Policies", + type: "doc", + id: "docs/authorization/access-policies-guide", + }, + { + label: "Posts", + type: "doc", + id: "docs/posts", + }, + { + label: "Schema history", + type: "doc", + id: "docs/schema-history", + }, + { + label: "Search", + type: "doc", + id: "docs/how/search", + }, + { + label: "Sync Status", + type: "doc", + id: "docs/sync-status", + }, + { + label: "Tags", + type: "doc", + id: "docs/tags", }, ], }, @@ -188,6 +262,7 @@ module.exports = { }, { "Managed DataHub Release History": [ + "docs/managed-datahub/release-notes/v_0_3_2", "docs/managed-datahub/release-notes/v_0_3_1", "docs/managed-datahub/release-notes/v_0_2_16", "docs/managed-datahub/release-notes/v_0_2_15", @@ -317,6 +392,11 @@ module.exports = { id: "docs/lineage/dagster", label: "Dagster", }, + { + type: "doc", + id: "docs/lineage/openlineage", + label: "OpenLineage", + }, { type: "doc", id: "metadata-integration/java/spark-lineage/README", @@ -809,6 +889,7 @@ module.exports = { // "metadata-jobs/README", // "docs/how/add-user-data", // "docs/_feature-guide-template" + // "docs/_api-guide-template" // - "metadata-service/services/README" // "metadata-ingestion/examples/structured_properties/README" // ], diff --git a/docs-website/static/img/logos/platforms/dagster.svg b/docs-website/static/img/logos/platforms/dagster.svg new file mode 100644 index 0000000000000..d2ae628553a7d --- /dev/null +++ b/docs-website/static/img/logos/platforms/dagster.svg @@ -0,0 +1,11 @@ + + + + + + + + + + + diff --git a/docs/_api-guide-template.md b/docs/_api-guide-template.md new file mode 100644 index 0000000000000..0be6e14a72647 --- /dev/null +++ b/docs/_api-guide-template.md @@ -0,0 +1,72 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# [Feature Name] + + + + +### Goal of This Guide + +This guide will show you how to... + + + +## Prerequisites + +For this tutorial, you need to deploy DataHub Quickstart and ingest sample data. For detailed steps, please refer to [Datahub Quickstart Guide] + + + +## [Action] [Feature Name] + + + + + + + + + + + + + + + + + + + + + + + + + +### Expected Outcome of [Action] [Feature Name] + + + diff --git a/docs/_feature-guide-template.md b/docs/_feature-guide-template.md index 63ba258d52d0b..9c1aead5e13ab 100644 --- a/docs/_feature-guide-template.md +++ b/docs/_feature-guide-template.md @@ -1,6 +1,6 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; -# About DataHub [Feature Name] +# [Feature Name] diff --git a/docs/act-on-metadata/impact-analysis.md b/docs/act-on-metadata/impact-analysis.md index ae593d09c255f..3dbf532b2dd84 100644 --- a/docs/act-on-metadata/impact-analysis.md +++ b/docs/act-on-metadata/impact-analysis.md @@ -1,6 +1,6 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; -# About DataHub Lineage Impact Analysis +# Lineage Impact Analysis diff --git a/docs/api/datahub-apis.md b/docs/api/datahub-apis.md index 252c96cab56c3..ed48eb0f52fa2 100644 --- a/docs/api/datahub-apis.md +++ b/docs/api/datahub-apis.md @@ -66,7 +66,7 @@ Here's an overview of what each API can do. | Create a Dataset | 🚫 | ✅ [[Guide]](/docs/api/tutorials/datasets.md) | ✅ | | Delete a Dataset (Soft Delete) | ✅ [[Guide]](/docs/api/tutorials/datasets.md#delete-dataset) | ✅ [[Guide]](/docs/api/tutorials/datasets.md#delete-dataset) | ✅ | | Delete a Dataset (Hard Delete) | 🚫 | ✅ [[Guide]](/docs/api/tutorials/datasets.md#delete-dataset) | ✅ | -| Search a Dataset | ✅ | ✅ | ✅ | +| Search a Dataset | ✅ [[Guide]](/docs/how/search.md#graphql) | ✅ | ✅ | | Read a Dataset Deprecation | ✅ | ✅ | ✅ | | Read Dataset Entities (V2) | ✅ | ✅ | ✅ | | Create a Tag | ✅ [[Guide]](/docs/api/tutorials/tags.md#create-tags) | ✅ [[Guide]](/docs/api/tutorials/tags.md#create-tags) | ✅ | @@ -116,4 +116,4 @@ Here's an overview of what each API can do. | Create Dataset Lineage with MCPW & Rest Emitter | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_emitter_mcpw_rest.py) | ✅ | | Create Dataset Lineage with Rest Emitter | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_emitter_rest.py) | ✅ | | Create DataJob with Dataflow | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_job_dataflow.py) [[Simple]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_job_dataflow_new_api_simple.py) [[Verbose]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_job_dataflow_new_api_verbose.py) | ✅ | -| Create Programmatic Pipeline | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/programatic_pipeline.py) | ✅ | \ No newline at end of file +| Create Programmatic Pipeline | 🚫 | ✅ [[Code]](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/programatic_pipeline.py) | ✅ | diff --git a/docs/api/tutorials/lineage.md b/docs/api/tutorials/lineage.md index cf83538c41ac3..c30307098d613 100644 --- a/docs/api/tutorials/lineage.md +++ b/docs/api/tutorials/lineage.md @@ -137,7 +137,7 @@ You can now see the column-level lineage between datasets. Note that you have to

-## Read Lineage +## Read Table Lineage @@ -199,3 +199,60 @@ curl --location --request POST 'http://localhost:8080/api/graphql' \ This will perform a multi-hop lineage search on the urn specified. For more information about the `searchAcrossLineage` mutation, please refer to [searchAcrossLineage](https://datahubproject.io/docs/graphql/queries/#searchacrosslineage). + +## Read Column Lineage + + + + +```graphql +query searchAcrossLineage { + searchAcrossLineage( + input: { + query: "*" + urn: "urn:li:schemaField(urn:li:dataset:(urn:li:dataPlatform:dbt,long_tail_companions.adoption.human_profiles,PROD),profile_id)" + start: 0 + count: 10 + direction: DOWNSTREAM + orFilters: [ + { + and: [ + { + condition: EQUAL + negated: false + field: "degree" + values: ["1", "2", "3+"] + } + ] + } + ] + } + ) { + searchResults { + degree + entity { + urn + type + } + } + } +} +``` + +This example shows using lineage degrees as a filter, but additional search filters can be included here as well. + + + + +```shell +curl --location --request POST 'http://localhost:8080/api/graphql' \ +--header 'Authorization: Bearer ' \ +--header 'Content-Type: application/json' --data-raw '{ { "query": "query searchAcrossLineage { searchAcrossLineage( input: { query: \"*\" urn: \"urn:li:schemaField(urn:li:dataset:(urn:li:dataPlatform:dbt,long_tail_companions.adoption.human_profiles,PROD),profile_id)\" start: 0 count: 10 direction: DOWNSTREAM orFilters: [ { and: [ { condition: EQUAL negated: false field: \"degree\" values: [\"1\", \"2\", \"3+\"] } ] } ] } ) { searchResults { degree entity { urn type } } }}" +}}' +``` + + + + +This will perform a multi-hop lineage search on the urn specified. You can see schemaField URNs are made up of two parts: first the table they are a column of, and second the path of the column. For more information about the `searchAcrossLineage` mutation, please refer to [searchAcrossLineage](https://datahubproject.io/docs/graphql/queries/#searchacrosslineage). + diff --git a/docs/authentication/personal-access-tokens.md b/docs/authentication/personal-access-tokens.md index ad81caef66f8f..8488163d85d15 100644 --- a/docs/authentication/personal-access-tokens.md +++ b/docs/authentication/personal-access-tokens.md @@ -1,6 +1,6 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; -# About DataHub Personal Access Tokens +# Personal Access Tokens diff --git a/docs/authorization/access-policies-guide.md b/docs/authorization/access-policies-guide.md index b8c23b0cd79b0..a9a54a762cd81 100644 --- a/docs/authorization/access-policies-guide.md +++ b/docs/authorization/access-policies-guide.md @@ -1,4 +1,4 @@ -# About DataHub Access Policies +# Access Policies diff --git a/docs/authorization/roles.md b/docs/authorization/roles.md index b25579072980d..7e2f1797309df 100644 --- a/docs/authorization/roles.md +++ b/docs/authorization/roles.md @@ -1,6 +1,6 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; -# About DataHub Roles +# Roles diff --git a/docs/developers.md b/docs/developers.md index 980aa3e3acf87..0c9d7bee3d79f 100644 --- a/docs/developers.md +++ b/docs/developers.md @@ -169,3 +169,28 @@ This means you're running out of space on your disk to build. Please free up som #### `Build failed` for task `./gradlew :datahub-frontend:dist -x yarnTest -x yarnLint` This could mean that you need to update your [Yarn](https://yarnpkg.com/getting-started/install) version + +#### `:buildSrc:compileJava` task fails with `package com.linkedin.metadata.models.registry.config does not exist` and `cannot find symbol` error for `Entity` + +There are currently two symbolic links within the [buildSrc](https://github.com/datahub-project/datahub/tree/master/buildSrc) directory for the [com.linkedin.metadata.aspect.plugins.config](https://github.com/datahub-project/datahub/blob/master/buildSrc/src/main/java/com/linkedin/metadata/aspect/plugins/config) and [com.linkedin.metadata.models.registry.config](https://github.com/datahub-project/datahub/blob/master/buildSrc/src/main/java/com/linkedin/metadata/models/registry/config +) packages, which points to the corresponding packages in the [entity-registry](https://github.com/datahub-project/datahub/tree/master/entity-registry) subproject. + +When the repository is checked out using Windows 10/11 - even if WSL is later used for building using the mounted Windows filesystem in `/mnt/` - the symbolic links might have not been created correctly, instead the symbolic links were checked out as plain files. Although it is technically possible to use the mounted Windows filesystem in `/mnt/` for building in WSL, it is **strongly recommended** to checkout the repository within the Linux filesystem (e.g., in `/home/`) and building it from there, because accessing the Windows filesystem from Linux is relatively slow compared to the Linux filesystem and slows down the whole building process. + +To be able to create symbolic links in Windows 10/11 the [Developer Mode](https://learn.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development) has to be enabled first. Then the following commands can be used to enable [symbolic links in Git](https://git-scm.com/docs/git-config#Documentation/git-config.txt-coresymlinks) and recreating the symbolic links: + +```shell +# enable core.symlinks config +git config --global core.symlinks true + +# check the current core.sysmlinks config and scope +git config --show-scope --show-origin core.symlinks + +# in case the core.sysmlinks config is still set locally to false, remove the local config +git config --unset core.symlinks + +# reset the current branch to recreate the missing symbolic links (alternatively it is also possibly to switch branches away and back) +git reset --hard +``` + +See also [here](https://stackoverflow.com/questions/5917249/git-symbolic-links-in-windows/59761201#59761201) for more information on how to enable symbolic links on Windows 10/11 and Git. diff --git a/docs/domains.md b/docs/domains.md index afaec796d55df..98e2577387037 100644 --- a/docs/domains.md +++ b/docs/domains.md @@ -1,6 +1,6 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; -# About DataHub Domains +# Domains diff --git a/docs/features/dataset-usage-and-query-history.md b/docs/features/dataset-usage-and-query-history.md index 2d06b932572b7..37cbc16cfe74f 100644 --- a/docs/features/dataset-usage-and-query-history.md +++ b/docs/features/dataset-usage-and-query-history.md @@ -1,6 +1,6 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; -# About DataHub Dataset Usage & Query History +# Dataset Usage & Query History diff --git a/docs/glossary/business-glossary.md b/docs/glossary/business-glossary.md index e10cbed30b913..9c9daabcb94c7 100644 --- a/docs/glossary/business-glossary.md +++ b/docs/glossary/business-glossary.md @@ -4,7 +4,7 @@ title: Business Glossary import FeatureAvailability from '@site/src/components/FeatureAvailability'; -# About DataHub Business Glossary +# Business Glossary diff --git a/docs/how/search.md b/docs/how/search.md index 0b718fc9dc77f..7012f5321f2ff 100644 --- a/docs/how/search.md +++ b/docs/how/search.md @@ -1,6 +1,6 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; -# About DataHub Search +# Search @@ -291,11 +291,11 @@ If enabled in #2 above, those queries will appear in the `should` section of the `boolean query`[[4](https://www.elastic.co/guide/en/elasticsearch/reference/7.17/query-dsl-bool-query.html)]. 4. `functionScore` - The Elasticsearch `function score`[[5](https://www.elastic.co/guide/en/elasticsearch/reference/7.17/query-dsl-function-score-query.html#score-functions)] section of the overall query. -### Examples +#### Examples These examples assume a match-all `queryRegex` of `.*` so that it would impact any search query for simplicity. -#### Example 1: Ranking By Tags/Terms +##### Example 1: Ranking By Tags/Terms Boost entities with tags of `primary` or `gold` and an example glossary term's uuid. @@ -327,7 +327,7 @@ queryConfigurations: boost_mode: multiply ``` -#### Example 2: Preferred Data Platform +##### Example 2: Preferred Data Platform Boost the `urn:li:dataPlatform:hive` platform. @@ -350,7 +350,7 @@ queryConfigurations: boost_mode: multiply ``` -#### Example 3: Exclusion & Bury +##### Example 3: Exclusion & Bury This configuration extends the 3 built-in queries with a rule to exclude `deprecated` entities from search results because they are not generally relevant as well as reduces the score of `materialized`. @@ -380,6 +380,73 @@ queryConfigurations: boost_mode: multiply ``` +### Search Autocomplete Configuration + +Similar to the options provided in the previous section for search configuration, there are autocomplete specific options +which can be configured. + +Note: The scoring functions defined in the previous section are inherited for autocomplete by default, unless +overrides are provided in the autocomplete section. + +For the most part the configuration options are identical to the search customization options in the previous +section, however they are located under `autocompleteConfigurations` in the yaml configuration file. + +1. `queryRegex` - Responsible for selecting the search customization based on the [regex matching](https://www.w3schools.com/java/java_regex.asp) the search query string. + *The first match is applied.* +2. The following boolean enables/disables the function score inheritance from the normal search configuration: [`inheritFunctionScore`] + This flag will automatically be set to `false` when the `functionScore` section is provided. If set to `false` with no + `functionScore` provided, the default Elasticsearch `_score` is used. +3. Built-in query booleans - There is 1 built-in query which can be enabled/disabled. These include + the `default autocomplete query` query, + enabled with the following booleans + respectively [`defaultQuery`] +4. `boolQuery` - The base Elasticsearch `boolean query`[[4](https://www.elastic.co/guide/en/elasticsearch/reference/7.17/query-dsl-bool-query.html)]. + If enabled in #2 above, those queries will + appear in the `should` section of the `boolean query`[[4](https://www.elastic.co/guide/en/elasticsearch/reference/7.17/query-dsl-bool-query.html)]. +5. `functionScore` - The Elasticsearch `function score`[[5](https://www.elastic.co/guide/en/elasticsearch/reference/7.17/query-dsl-function-score-query.html#score-functions)] section of the overall query. + +#### Examples + +These examples assume a match-all `queryRegex` of `.*` so that it would impact any search query for simplicity. Also +note that the `queryRegex` is applied individually for `searchConfigurations` and `autocompleteConfigurations` and they +do not have to be identical. + +##### Example 1: Exclude `deprecated` entities from autocomplete + +```yaml +autocompleteConfigurations: + - queryRegex: .* + defaultQuery: true + + boolQuery: + must: + - term: + deprecated: 'false' +``` + +#### Example 2: Override scoring for autocomplete + +```yaml +autocompleteConfigurations: + - queryRegex: .* + defaultQuery: true + + functionScore: + functions: + - filter: + term: + materialized: + value: true + weight: 1.1 + - filter: + term: + deprecated: + value: false + weight: 0.5 + score_mode: avg + boost_mode: multiply +``` + ## FAQ and Troubleshooting **How are the results ordered?** diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index eeb184ffadd48..ba4708002ed21 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -20,6 +20,10 @@ This file documents any backwards-incompatible changes in DataHub and assists pe ### Breaking Changes +- #10419 - `aws_region` is now a required configuration in the DynamoDB connector. The connector will no longer loop through all AWS regions; instead, it will only use the region passed into the recipe configuration. +- #10389 - Custom validators, mutators, side-effects dropped a previously required constructor +- #10472 - `RVW` added as a FabricType. No rollbacks allowed once metadata with this fabric type is added without manual cleanups in databases. + ### Potential Downtime ### Deprecations diff --git a/docs/incidents/incidents.md b/docs/incidents/incidents.md index 5f51e421aad3b..578571289cd2e 100644 --- a/docs/incidents/incidents.md +++ b/docs/incidents/incidents.md @@ -4,7 +4,7 @@ description: This page provides an overview of working with the DataHub Incident import FeatureAvailability from '@site/src/components/FeatureAvailability'; -# About Incidents +# Incidents @@ -14,8 +14,8 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; A couple scenarios in which incidents can be useful are -1**Communicating Assets with Ongoing Issues**: You can mark a known-bad data asset as under an ongoing incident so consumers and stakeholders can be informed about the health status of a data asset via the DataHub UI. Moreover, they can follow the incident as it progresses toward resolution. -2**Pipeline Circuit Breaking (advanced):** You can use Incidents as a basis for orchestrating and blocking data pipelines that have inputs with active issues to avoid propagating bad data downstream. +1. **Communicating Assets with Ongoing Issues**: You can mark a known-bad data asset as under an ongoing incident so consumers and stakeholders can be informed about the health status of a data asset via the DataHub UI. Moreover, they can follow the incident as it progresses toward resolution. +2. **Pipeline Circuit Breaking (advanced):** You can use Incidents as a basis for orchestrating and blocking data pipelines that have inputs with active issues to avoid propagating bad data downstream. In the next section, we'll walk through how to diff --git a/docs/lineage/airflow.md b/docs/lineage/airflow.md index d501ea407c072..f0952309c328a 100644 --- a/docs/lineage/airflow.md +++ b/docs/lineage/airflow.md @@ -8,7 +8,7 @@ If you're looking to schedule DataHub ingestion using Airflow, see the guide on The DataHub Airflow plugin supports: -- Automatic column-level lineage extraction from various operators e.g. SQL operators (including `MySqlOperator`, `PostgresOperator`, `SnowflakeOperator`, and more), `S3FileTransformOperator`, and more. +- Automatic column-level lineage extraction from various operators e.g. SQL operators (including `MySqlOperator`, `PostgresOperator`, `SnowflakeOperator`, `BigQueryInsertJobOperator`, and more), `S3FileTransformOperator`, and more. - Airflow DAG and tasks, including properties, ownership, and tags. - Task run information, including task successes and failures. - Manual lineage annotations using `inlets` and `outlets` on Airflow operators. @@ -166,6 +166,7 @@ Supported operators: - `SQLExecuteQueryOperator`, including any subclasses. Note that in newer versions of Airflow (generally Airflow 2.5+), most SQL operators inherit from this class. - `AthenaOperator` and `AWSAthenaOperator` - `BigQueryOperator` and `BigQueryExecuteQueryOperator` +- `BigQueryInsertJobOperator` (incubating) - `MySqlOperator` - `PostgresOperator` - `RedshiftSQLOperator` @@ -224,6 +225,14 @@ class DbtOperator(BaseOperator): If you override the `pre_execute` and `post_execute` function, ensure they include the `@prepare_lineage` and `@apply_lineage` decorators respectively. Reference the [Airflow docs](https://airflow.apache.org/docs/apache-airflow/stable/administration-and-deployment/lineage.html#lineage) for more details. +### Custom Extractors + +Note: these are only supported in the v2 plugin. + +You can also create a custom extractor to extract lineage from any operator. This is useful if you're using a built-in Airflow operator for which we don't support automatic lineage extraction. + +See this [example PR](https://github.com/datahub-project/datahub/pull/10452) which adds a custom extractor for the `BigQueryInsertJobOperator` operator. + ## Emit Lineage Directly If you can't use the plugin or annotate inlets/outlets, you can also emit lineage using the `DatahubEmitterOperator`. diff --git a/docs/lineage/openlineage.md b/docs/lineage/openlineage.md new file mode 100644 index 0000000000000..0b9423bf2c4da --- /dev/null +++ b/docs/lineage/openlineage.md @@ -0,0 +1,92 @@ +# OpenLineage + +DataHub, now supports [OpenLineage](https://openlineage.io/) integration. With this support, DataHub can ingest and display lineage information from various data processing frameworks, providing users with a comprehensive understanding of their data pipelines. + +## Features + +- **REST Endpoint Support**: DataHub now includes a REST endpoint that can understand OpenLineage events. This allows users to send lineage information directly to DataHub, enabling easy integration with various data processing frameworks. + +- **[Spark Event Listener Plugin](https://datahubproject.io/docs/metadata-integration/java/spark-lineage-beta)**: DataHub provides a Spark Event Listener plugin that seamlessly integrates OpenLineage's Spark plugin. This plugin enhances DataHub's OpenLineage support by offering additional features such as PathSpec support, column-level lineage, patch support and more. + +## OpenLineage Support with DataHub + +### 1. REST Endpoint Support + +DataHub's REST endpoint allows users to send OpenLineage events directly to DataHub. This enables easy integration with various data processing frameworks, providing users with a centralized location for viewing and managing data lineage information. + +With Spark and Airflow we recommend using the Spark Lineage or DataHub's Airflow plugin for tighter integration with DataHub. + +#### How to Use + +To send OpenLineage messages to DataHub using the REST endpoint, simply make a POST request to the following endpoint: + +``` +POST GMS_SERVER_HOST:GMS_PORT/api/v2/lineage +``` + +Include the OpenLineage message in the request body in JSON format. + +Example: + +```json +{ + "eventType": "START", + "eventTime": "2020-12-28T19:52:00.001+10:00", + "run": { + "runId": "d46e465b-d358-4d32-83d4-df660ff614dd" + }, + "job": { + "namespace": "workshop", + "name": "process_taxes" + }, + "inputs": [ + { + "namespace": "postgres://workshop-db:None", + "name": "workshop.public.taxes", + "facets": { + "dataSource": { + "_producer": "https://github.com/OpenLineage/OpenLineage/tree/0.10.0/integration/airflow", + "_schemaURL": "https://raw.githubusercontent.com/OpenLineage/OpenLineage/main/spec/OpenLineage.json#/definitions/DataSourceDatasetFacet", + "name": "postgres://workshop-db:None", + "uri": "workshop-db" + } + } + } + ], + "producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client" +} +``` +##### How to set up Airflow +Follow the Airflow guide to setup the Airflow DAGs to send lineage information to DataHub. The guide can be found [here](https://airflow.apache.org/docs/apache-airflow-providers-openlineage/stable/guides/user.html +The transport should look like this: +```json +{"type": "http", + "url": "https://GMS_SERVER_HOST:GMS_PORT/openapi/openlineage/", + "endpoint": "api/v1/lineage", + "auth": { + "type": "api_key", + "api_key": "your-datahub-api-key" + } +} +``` + +#### Known Limitations +With Spark and Airflow we recommend using the Spark Lineage or DataHub's Airflow plugin for tighter integration with DataHub. + +- **[PathSpec](https://datahubproject.io/docs/metadata-integration/java/spark-lineage-beta/#configuring-hdfs-based-dataset-urns) Support**: While the REST endpoint supports OpenLineage messages, full [PathSpec](https://datahubproject.io/docs/metadata-integration/java/spark-lineage-beta/#configuring-hdfs-based-dataset-urns)) support is not yet available. + +- **Column-level Lineage**: DataHub's current OpenLineage support does not provide full column-level lineage tracking. +- etc... +### 2. Spark Event Listener Plugin + +DataHub's Spark Event Listener plugin enhances OpenLineage support by providing additional features such as PathSpec support, column-level lineage, and more. + +#### How to Use + +Follow the guides of the Spark Lineage plugin page for more information on how to set up the Spark Lineage plugin. The guide can be found [here](https://datahubproject.io/docs/metadata-integration/java/spark-lineage-beta) + +## References + +- [OpenLineage](https://openlineage.io/) +- [DataHub OpenAPI Guide](../api/openapi/openapi-usage-guide.md) +- [DataHub Spark Lineage Plugin](https://datahubproject.io/docs/metadata-integration/java/spark-lineage-beta) diff --git a/docs/managed-datahub/approval-workflows.md b/docs/managed-datahub/approval-workflows.md index 3853a7c37817f..75cab458d285d 100644 --- a/docs/managed-datahub/approval-workflows.md +++ b/docs/managed-datahub/approval-workflows.md @@ -1,6 +1,6 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; -# About DataHub Approval Workflows +# Approval Workflows diff --git a/docs/managed-datahub/datahub-api/entity-events-api.md b/docs/managed-datahub/datahub-api/entity-events-api.md index ebc3bb97f9554..e59f1650c7d76 100644 --- a/docs/managed-datahub/datahub-api/entity-events-api.md +++ b/docs/managed-datahub/datahub-api/entity-events-api.md @@ -352,7 +352,7 @@ This event is emitted when a description has been added to an entity on DataHub. #### Header -
CategoryOperationEntity Types
DOCUMENTATIONADDdataset, dashboard, chart, dataJob, dataFlow , container, glossaryTerm, domain, tag
+
CategoryOperationEntity Types
DOCUMENTATIONADDdataset, dashboard, chart, dataJob, dataFlow , container, glossaryTerm, domain, tag, schemaField
#### Parameters @@ -384,7 +384,7 @@ This event is emitted when an existing description has been removed from an enti #### Header -
CategoryOperationEntity Types
DOCUMENTATIONREMOVEdataset, dashboard, chart, dataJob, container ,dataFlow , glossaryTerm, domain, tag
+
CategoryOperationEntity Types
DOCUMENTATIONREMOVEdataset, dashboard, chart, dataJob, container ,dataFlow , glossaryTerm, domain, tag, schemaField
#### Parameters diff --git a/docs/managed-datahub/observe/column-assertions.md b/docs/managed-datahub/observe/column-assertions.md index a0ca8abe26d51..3e5b903dc60ba 100644 --- a/docs/managed-datahub/observe/column-assertions.md +++ b/docs/managed-datahub/observe/column-assertions.md @@ -8,11 +8,8 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; -> ⚠️ The **Column Assertions** feature is currently in private beta, part of the **Acryl Observe** module, and may only -> be available to a limited set of design partners. -> -> If you are interested in trying it and providing feedback, please reach out to your Acryl Customer Success -> representative. +> The **Column Assertions** feature is available as part of the **Acryl Observe** module of Acryl Cloud. +> If you are interested in learning more about **Acryl Observe** or trying it out, please [visit our website](https://www.acryldata.io/observe). ## Introduction @@ -36,6 +33,7 @@ Column Assertions are currently supported for: 2. Redshift 3. BigQuery 4. Databricks +5. DataHub Dataset Profile Metrics (collected via ingestion) Note that an Ingestion Source _must_ be configured with the data platform of your choice in Acryl DataHub's **Ingestion** tab. @@ -118,10 +116,11 @@ another always-increasing number - that can be used to find the "new rows" that ### Prerequisites 1. **Permissions**: To create or delete Column Assertions for a specific entity on DataHub, you'll need to be granted the - `Edit Assertions` and `Edit Monitors` privileges for the entity. This is granted to Entity owners by default. + `Edit Assertions` and `Edit Monitors` privileges for the entity. This will be granted to Entity owners as part of the `Asset Owners - Metadata Policy` + by default. -2. **Data Platform Connection**: In order to create a Column Assertion, you'll need to have an **Ingestion Source** - configured to your Data Platform: Snowflake, BigQuery, Redshift, or Databricks under the **Ingestion** tab. +2. (Optional) **Data Platform Connection**: In order to create a Column Assertion that queries the data source directly (instead of DataHub metadata), you'll need to have an **Ingestion Source** + configured to your Data Platform: Snowflake, BigQuery, or Redshift under the **Ingestion** tab. Once these are in place, you're ready to create your Column Assertions! @@ -130,14 +129,14 @@ Once these are in place, you're ready to create your Column Assertions! 1. Navigate to the Table that you want to monitor 2. Click the **Validations** tab -

+

3. Click **+ Create Assertion** -

- +

+

4. Choose **Column** @@ -149,15 +148,15 @@ Once these are in place, you're ready to create your Column Assertions! **Column Value** assertions are used to monitor the value of a specific column in a table, and ensure that every row adheres to a specific condition. **Column Metric** assertions are used to compute a metric for that column, and then compare the value of that metric to your expectations. -

- +

+

7. Configure the **column selection**. This defines the column that should be monitored by the Column Assertion. You can choose from any of the columns from the table listed in the dropdown. -

- +

+

8. Configure the **evaluation criteria**. This step varies based on the type of assertion you chose in the previous step. @@ -186,7 +185,7 @@ Once these are in place, you're ready to create your Column Assertions! have changed. A **High Watermark Column** is a column that contains a constantly-incrementing value - a date, a time, or another always-increasing number. When selected, a query will be issued to the table find only the rows which have changed since the last assertion run. -

+

@@ -206,11 +205,10 @@ Once these are in place, you're ready to create your Column Assertions! assertion. This is useful if you want to limit the assertion to a subset of rows in the table. Note this option will not be available if you choose **DataHub Dataset Profile** as the **source**. -11. Click **Next** -12. Configure actions that should be taken when the Column Assertion passes or fails +11. Configure actions that should be taken when the Column Assertion passes or fails

- +

- **Raise incident**: Automatically raise a new DataHub `Column` Incident for the Table whenever the Column Assertion is failing. This @@ -219,48 +217,40 @@ Once these are in place, you're ready to create your Column Assertions! - **Resolve incident**: Automatically resolved any incidents that were raised due to failures in this Column Assertion. Note that any other incidents will not be impacted. -10. Click **Save**. +12. Click **Next** and then **Save**. And that's it! DataHub will now begin to monitor your Column Assertion for the table. -To view the time of the next Column Assertion evaluation, simply click **Column** and then click on your -new Assertion: - -

- -

- Once your assertion has run, you will begin to see Success or Failure status for the Table -

- +

+

## Stopping a Column Assertion -In order to temporarily stop the evaluation of a Column Assertion: +In order to temporarily stop the evaluation of the assertion: -1. Navigate to the **Validations** tab of the table with the assertion -2. Click **Column** to open the Column Assertions list -3. Click the three-dot menu on the right side of the assertion you want to disable -4. Click **Stop** +1. Navigate to the **Validations** tab of the Table with the assertion +2. Click **Column** to open the Column Assertion assertions +3. Click the "Stop" button for the assertion you wish to pause.

- +

-To resume the Column Assertion, simply click **Turn On**. +To resume the assertion, simply click **Start**. -

- +

+

+ ## Creating Column Assertions via API -Under the hood, Acryl DataHub implements Column Assertion Monitoring using two "entity" concepts: +Under the hood, Acryl DataHub implements Column Assertion Monitoring using two concepts: - **Assertion**: The specific expectation for the column metric. e.g. "The value of an integer column is greater than 10 for all rows in the table." This is the "what". - - **Monitor**: The process responsible for evaluating the Assertion on a given evaluation schedule and using specific mechanisms. This is the "how". @@ -269,80 +259,13 @@ Note that to create or delete Assertions and Monitors for a specific entity on D #### GraphQL -In order to create a Column Assertion that is being monitored on a specific **Evaluation Schedule**, you'll need to use 2 -GraphQL mutation queries to create a Column Assertion entity and create an Assertion Monitor entity responsible for evaluating it. - -Start by creating the Column Assertion entity using the `createFieldAssertion` query and hang on to the 'urn' field of the Assertion entity -you get back. Then continue by creating a Monitor entity using the `createAssertionMonitor`. +In order to create or update a Column Assertion, you can the `upsertDatasetColumnAssertionMonitor` mutation. ##### Examples -To create a Column Assertion Entity that checks that the value of an integer column is greater than 10: - -```json -mutation createFieldAssertion { - createFieldAssertion( - input: { - entityUrn: "", - type: FIELD_VALUES, - fieldValuesAssertion: { - field: { - path: "", - type: "NUMBER", - nativeType: "NUMBER(38,0)" - }, - operator: GREATER_THAN, - parameters: { - value: { - type: NUMBER, - value: "10" - } - }, - failThreshold: { - type: COUNT, - value: 0 - }, - excludeNulls: true - } - } - ) { - urn -} -} -``` - -To create an Assertion Monitor Entity that evaluates the column assertion every 8 hours using all rows in the table: +Creating a Field Values Column Assertion that runs every 8 hours: -```json -mutation createAssertionMonitor { - createAssertionMonitor( - input: { - entityUrn: "", - assertionUrn: "", - schedule: { - cron: "0 */8 * * *", - timezone: "America/Los_Angeles" - }, - parameters: { - type: DATASET_FIELD, - datasetFieldParameters: { - sourceType: ALL_ROWS_QUERY - } - } - } - ) { - urn - } -} -``` - -This entity defines _when_ to run the check (Using CRON format - every 8th hour) and _how_ to run the check (using a query against all rows of the table). - -After creating the monitor, the new assertion will start to be evaluated every 8 hours in your selected timezone. - -Alternatively you can use `upsertDatasetFieldAssertionMonitor` graphql endpoint for creating a Column Assertion and corresponding Monitor for a dataset. - -```json +```graphql mutation upsertDatasetFieldAssertionMonitor { upsertDatasetFieldAssertionMonitor( input: { @@ -376,7 +299,7 @@ mutation upsertDatasetFieldAssertionMonitor { } mode: ACTIVE } - ){ + ) { urn } } @@ -384,7 +307,7 @@ mutation upsertDatasetFieldAssertionMonitor { You can use same endpoint with assertion urn input to update an existing Column Assertion and corresponding Monitor. -```json +```graphql mutation upsertDatasetFieldAssertionMonitor { upsertDatasetFieldAssertionMonitor( assertionUrn: "" @@ -419,7 +342,7 @@ mutation upsertDatasetFieldAssertionMonitor { } mode: ACTIVE } - ){ + ) { urn } } diff --git a/docs/managed-datahub/observe/custom-sql-assertions.md b/docs/managed-datahub/observe/custom-sql-assertions.md index adf1c8bd44c8b..ce6e46cfda1fd 100644 --- a/docs/managed-datahub/observe/custom-sql-assertions.md +++ b/docs/managed-datahub/observe/custom-sql-assertions.md @@ -8,12 +8,8 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; - -> ⚠️ The **Custom SQL Assertions** feature is currently in private beta, part of the **Acryl Observe** module, and may only be available to a -> limited set of design partners. -> -> If you are interested in trying it and providing feedback, please reach out to your Acryl Customer Success -> representative. +> The **Custom SQL Assertions** feature is available as part of the **Acryl Observe** module of Acryl Cloud. +> If you are interested in learning more about **Acryl Observe** or trying it out, please [visit our website](https://www.acryldata.io/observe). ## Introduction @@ -72,8 +68,7 @@ At the most basic level, **Custom SQL Assertions** consist of a few important pa 1. An **Evaluation Schedule** 2. A **Query** -3. An **Condition Type** -4. An **Assertion Description** +3. A **Condition Type** In this section, we'll give an overview of each. @@ -96,6 +91,7 @@ Use the "Try it out" button to test your query and ensure that it returns a sing #### 3. Condition Type The **Condition Type**: This defines the conditions under which the Assertion will **fail**. The list of supported operations is: + - **Is Equal To**: The assertion will fail if the query result is equal to the configured value - **Is Not Equal To**: The assertion will fail if the query result is not equal to the configured value - **Is Greater Than**: The assertion will fail if the query result is greater than the configured value @@ -107,17 +103,14 @@ The **Condition Type**: This defines the conditions under which the Assertion wi Custom SQL Assertions also have an off switch: they can be started or stopped at any time with the click of button. -#### 4. Assertion Description - -The **Assertion Description**: This is a human-readable description of the Assertion. It should be used to describe the meaning of the Assertion, and can be used to provide additional context to users who are viewing the Assertion. - ## Creating a Custom SQL Assertion ### Prerequisites 1. **Permissions**: To create or delete Custom SQL Assertions for a specific entity on DataHub, you'll need to be granted the - `Edit Assertions`, `Edit Monitors`, **and the additional `Edit SQL Assertion Monitors`** privileges for the entity. This is granted to Entity owners by default. + `Edit Assertions`, `Edit Monitors`, **and the additional `Edit SQL Assertion Monitors`** privileges for the entity. This will be granted to Entity owners as part of the `Asset Owners - Metadata Policy` + by default. 2. **Data Platform Connection**: In order to create a Custom SQL Assertion, you'll need to have an **Ingestion Source** configured to your Data Platform: Snowflake, BigQuery, Redshift, or Databricks under the **Integrations** tab. @@ -129,14 +122,14 @@ Once these are in place, you're ready to create your Custom SQL Assertions! 1. Navigate to the Table you want to monitor 2. Click the **Validations** tab -

- +

+

3. Click **+ Create Assertion** -

- +

+

4. Choose **Custom** @@ -146,82 +139,70 @@ Once these are in place, you're ready to create your Custom SQL Assertions! 6. Provide a SQL **query** that will be used to evaluate the Table. The query should return a single row with a single column. Currently only numeric values are supported (integer and floats). The query can be as simple or as complex as you'd like, and can use any SQL features supported by your Data Warehouse. Make sure to use the fully qualified name of the Table in your query. -

- +

+

7. Configure the evaluation **condition type**. This determines the cases in which the new assertion will fail when it is evaluated. -

- -

- -8. Add a **description** for the assertion. This is a human-readable description of the Assertion. It should be used to describe the meaning of the Assertion, and can be used to provide additional context to users who are viewing the Assertion. - -

- -

- -9. (Optional) Use the **Try it out** button to test your query and ensure that it returns a single row with a single column, and passes the configured condition type. - -

- +

+

-10. Click **Next** -11. Configure actions that should be taken when the Custom SQL Assertion passes or fails +8. Configure actions that should be taken when the Custom SQL Assertion passes or fails

- +

- **Raise incident**: Automatically raise a new DataHub Incident for the Table whenever the Custom SQL Assertion is failing. This may indicate that the Table is unfit for consumption. Configure Slack Notifications under **Settings** to be notified when an incident is created due to an Assertion failure. + - **Resolve incident**: Automatically resolved any incidents that were raised due to failures in this Custom SQL Assertion. Note that any other incidents will not be impacted. -1. Click **Save**. -And that's it! DataHub will now begin to monitor your Custom SQL Assertion for the table. - -To view the time of the next Custom SQL Assertion evaluation, simply click **Custom** and then click on your -new Assertion: +9. (Optional) Use the **Try it out** button to test your query and ensure that it returns a single row with a single column, and passes the configured condition type. -

- +

+

+10. Click **Next** and then add a description. + +11. Click **Save** + +And that's it! DataHub will now begin to monitor your Custom SQL Assertion for the table. + Once your assertion has run, you will begin to see Success or Failure status for the Table -

- +

+

## Stopping a Custom SQL Assertion -In order to temporarily stop the evaluation of a Custom SQL Assertion: +In order to temporarily stop the evaluation of the assertion: 1. Navigate to the **Validations** tab of the Table with the assertion -2. Click **Custom** to open the Custom SQL Assertions list -3. Click the three-dot menu on the right side of the assertion you want to disable -4. Click **Stop** +2. Click **Custom SQL** to open the SQL Assertion assertions +3. Click the "Stop" button for the assertion you wish to pause.

- +

-To resume the Custom SQL Assertion, simply click **Turn On**. +To resume the assertion, simply click **Start**. -

- +

+

- ## Creating Custom SQL Assertions via API -Under the hood, Acryl DataHub implements Custom SQL Assertion Monitoring using two "entity" concepts: +Under the hood, Acryl DataHub implements Custom SQL Assertion Monitoring using two concepts: - **Assertion**: The specific expectation for the custom assertion, e.g. "The table was changed in the past 7 hours" or "The table is changed on a schedule of every day by 8am". This is the "what". @@ -234,72 +215,13 @@ Note that to create or delete Assertions and Monitors for a specific entity on D #### GraphQL -In order to create a Custom SQL Assertion that is being monitored on a specific **Evaluation Schedule**, you'll need to use 2 -GraphQL mutation queries to create a Custom SQL Assertion entity and create an Assertion Monitor entity responsible for evaluating it. - -Start by creating the Custom SQL Assertion entity using the `createSqlAssertion` query and hang on to the 'urn' field of the Assertion entity -you get back. Then continue by creating a Monitor entity using the `createAssertionMonitor`. +In order to create or update a Custom SQL Assertion, you can use the `upsertDatasetSqlAssertionMonitor` mutation. ##### Examples -To create a Custom SQL Assertion Entity that checks whether a query result is greater than 100: - -```json -mutation createSqlAssertion { - createSqlAssertion( - input: { - entityUrn: "", - type: METRIC, - description: "", - statement: "", - operator: GREATER_THAN, - parameters: { - value: { - value: "100", - type: NUMBER - } - } - } - ) { - urn - } -} -``` - -The supported assertion types are `METRIC` and `METRIC_CHANGE`. If you choose `METRIC_CHANGE`, -you will need to provide a `changeType` parameter with either `ABSOLUTE` or `PERCENTAGE` values. -The supported operator types are `EQUAL_TO`, `NOT_EQUAL_TO`, `GREATER_THAN`, `GREATER_THAN_OR_EQUAL_TO`, `LESS_THAN`, `LESS_THAN_OR_EQUAL_TO`, and `BETWEEN` (requires minValue, maxValue). -The supported parameter types are `NUMBER`. - -To create an Assertion Monitor Entity that evaluates the custom assertion every 8 hours: - -```json -mutation createAssertionMonitor { - createAssertionMonitor( - input: { - entityUrn: "", - assertionUrn: "", - schedule: { - cron: "0 */8 * * *", - timezone: "America/Los_Angeles" - }, - parameters: { - type: DATASET_SQL - } - } - ) { - urn - } -} -``` - -This entity defines _when_ to run the check (Using CRON format - every 8th hour) and _how_ to run the check (using the Information Schema). - -After creating the monitor, the new assertion will start to be evaluated every 8 hours in your selected timezone. - -Alternatively you can use `upsertDatasetSqlAssertionMonitor` graphql endpoint for creating a Custom SQL Assertion and corresponding Monitor for a dataset. +To create a Custom SQL Assertion Entity that checks whether a query result is greater than 100 that runs every 8 hours: -```json +```graphql mutation upsertDatasetSqlAssertionMonitor { upsertDatasetSqlAssertionMonitor( input: { @@ -328,7 +250,7 @@ mutation upsertDatasetSqlAssertionMonitor { You can use same endpoint with assertion urn input to update an existing Custom SQL Assertion and corresponding Monitor. -```json +```graphql mutation upsertDatasetSqlAssertionMonitor { upsertDatasetSqlAssertionMonitor( assertionUrn: "" diff --git a/docs/managed-datahub/observe/freshness-assertions.md b/docs/managed-datahub/observe/freshness-assertions.md index 14ff828dc7376..5e80c9dd940dc 100644 --- a/docs/managed-datahub/observe/freshness-assertions.md +++ b/docs/managed-datahub/observe/freshness-assertions.md @@ -8,12 +8,8 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; - -> ⚠️ The **Freshness Assertions** feature is currently in private beta, part of the **Acryl Observe** module, and may only be available to a -> limited set of design partners. -> -> If you are interested in trying it and providing feedback, please reach out to your Acryl Customer Success -> representative. +> The **Freshness Assertions** feature is available as part of the **Acryl Observe** module of Acryl Cloud. +> If you are interested in learning more about **Acryl Observe** or trying it out, please [visit our website](https://www.acryldata.io/observe). ## Introduction @@ -44,6 +40,7 @@ Freshness Assertions are currently supported for: 2. Redshift 3. BigQuery 4. Databricks +5. DataHub Operations (collected via ingestion) Note that an Ingestion Source _must_ be configured with the data platform of your choice in Acryl DataHub's **Ingestion** tab. @@ -144,10 +141,12 @@ Freshness Assertions also have an off switch: they can be started or stopped at ### Prerequisites 1. **Permissions**: To create or delete Freshness Assertions for a specific entity on DataHub, you'll need to be granted the -`Edit Assertions` and `Edit Monitors` privileges for the entity. This is granted to Entity owners by default. +`Edit Assertions` and `Edit Monitors` privileges for the entity. This will be granted to Entity owners as part of the `Asset Owners - Metadata Policy` +by default. -2. **Data Platform Connection**: In order to create a Freshness Assertion, you'll need to have an **Ingestion Source** configured to your -Data Platform: Snowflake, BigQuery, Redshift, or Databricks under the **Integrations** tab. +2. (Optional) **Data Platform Connection**: In order to create a Freshness Assertion that queries the source data platform directly (instead of DataHub metadata), you'll need to have an **Ingestion Source** configured to your +Data Platform: Snowflake, BigQuery, or Redshift under the **Integrations** tab. + Once these are in place, you're ready to create your Freshness Assertions! @@ -156,14 +155,14 @@ Once these are in place, you're ready to create your Freshness Assertions! 1. Navigate to the Table that to monitor for freshness 2. Click the **Validations** tab -

- +

+

3. Click **+ Create Assertion** -

- +

+

4. Choose **Freshness** @@ -176,22 +175,22 @@ or _In the past X hours_ to configure a fixed interval that is used when checkin _Check whether the table has changed between subsequent evaluations of the check_ -

- +

+

_Check whether the table has changed in a specific window of time_ -

- +

+

7. (Optional) Click **Advanced** to customize the evaluation **source**. This is the mechanism that will be used to evaluate the check. Each Data Platform supports different options including Audit Log, Information Schema, Last Modified Column, High Watermark Column, and DataHub Operation. -

- +

+

- **Audit Log**: Check the Data Platform operational audit log to determine whether the table changed within the evaluation period. @@ -203,54 +202,48 @@ the check. Each Data Platform supports different options including Audit Log, In when using a fixed lookback period. - **DataHub Operation**: Use DataHub Operations to determine whether the table changed within the evaluation period. -1. Click **Next** -2. Configure actions that should be taken when the Freshness Assertion passes or fails +8. Configure actions that should be taken when the Freshness Assertion passes or fails

- +

- **Raise incident**: Automatically raise a new DataHub `Freshness` Incident for the Table whenever the Freshness Assertion is failing. This may indicate that the Table is unfit for consumption. Configure Slack Notifications under **Settings** to be notified when - an incident is created due to an Assertion failure. + an incident is created due to an Assertion failure. + - **Resolve incident**: Automatically resolved any incidents that were raised due to failures in this Freshness Assertion. Note that - any other incidents will not be impacted. + any other incidents will not be impacted. + +9. Click **Next** and add a description. 10. Click **Save**. And that's it! DataHub will now begin to monitor your Freshness Assertion for the table. -To view the time of the next Freshness Assertion evaluation, simply click **Freshness** and then click on your -new Assertion: - -

- -

- Once your assertion has run, you will begin to see Success or Failure status for the Table -

- +

+

## Stopping a Freshness Assertion -In order to temporarily stop the evaluation of a Freshness Assertion: +In order to temporarily stop the evaluation of the assertion: 1. Navigate to the **Validations** tab of the Table with the assertion -2. Click **Freshness** to open the Freshness Assertions list -3. Click the three-dot menu on the right side of the assertion you want to disable -4. Click **Stop** +2. Click **Freshness** to open the Freshness Assertion assertions +3. Click the "Stop" button for the assertion you wish to pause.

- +

-To resume the Freshness Assertion, simply click **Turn On**. +To resume the assertion, simply click **Start**. -

- +

+

@@ -263,7 +256,7 @@ requiring any manual setup. If Acryl DataHub is able to detect a pattern in the change frequency of a Snowflake, Redshift, BigQuery, or Databricks Table, you'll find a recommended Smart Assertion under the `Validations` tab on the Table profile page: -

+

@@ -275,7 +268,7 @@ Don't need it anymore? Smart Assertions can just as easily be turned off by clic ## Creating Freshness Assertions via API -Under the hood, Acryl DataHub implements Freshness Assertion Monitoring using two "entity" concepts: +Under the hood, Acryl DataHub implements Freshness Assertion Monitoring using two concepts: - **Assertion**: The specific expectation for freshness, e.g. "The table was changed int the past 7 hours" or "The table is changed on a schedule of every day by 8am". This is the "what". @@ -288,67 +281,13 @@ Note that to create or delete Assertions and Monitors for a specific entity on D #### GraphQL -In order to create a Freshness Assertion that is being monitored on a specific **Evaluation Schedule**, you'll need to use 2 -GraphQL mutation queries to create a Freshness Assertion entity and create an Assertion Monitor entity responsible for evaluating it. - -Start by creating the Freshness Assertion entity using the `createFreshnessAssertion` query and hang on to the 'urn' field of the Assertion entity -you get back. Then continue by creating a Monitor entity using the `createAssertionMonitor`. +In order to create or update a Freshness Assertion, you can use the `upsertDatasetFreshnessAssertionMonitor` mutation. ##### Examples -To create a Freshness Assertion Entity that checks whether a table has been updated in the past 8 hours: - -```json -mutation createFreshnessAssertion { - createFreshnessAssertion( - input: { - entityUrn: "", - type: DATASET_CHANGE, - schedule: { - type: FIXED_INTERVAL, - fixedInterval: { unit: HOUR, multiple: 8 } - } - } - ) { - urn - } -} -``` - -This defines the user's expectation: that the table should have changed in the past 8 hours whenever the assertion is evaluated. +To create a Freshness Assertion Entity that checks whether a table has been updated in the past 8, and runs every 8 hours: -To create an Assertion Monitor Entity that evaluates the assertion every 8 hours using the Audit Log: - -```json -mutation createAssertionMonitor { - createAssertionMonitor( - input: { - entityUrn: "", - assertionUrn: "", - schedule: { - cron: "0 */8 * * *", - timezone: "America/Los_Angeles" - }, - parameters: { - type: DATASET_FRESHNESS, - datasetFreshnessParameters: { - sourceType: AUDIT_LOG, - } - } - } - ) { - urn - } -} -``` - -This entity defines _when_ to run the check (Using CRON format - every 8th hour) and _how_ to run the check (using the Audit Log). - -After creating the monitor, the new assertion will start to be evaluated every 8 hours in your selected timezone. - -Alternatively you can use `upsertDatasetFreshnessAssertionMonitor` graphql endpoint for creating a Freshness Assertion and corresponding Monitor for a dataset. - -```json +```graphql mutation upsertDatasetFreshnessAssertionMonitor { upsertDatasetFreshnessAssertionMonitor( input: { @@ -366,15 +305,15 @@ mutation upsertDatasetFreshnessAssertionMonitor { } mode: ACTIVE } - ){ + ) { urn } } ``` -You can use same endpoint with assertion urn input to update an existing Freshness Assertion and corresponding Monitor. +You can use same endpoint with assertion urn input to update an existing Freshness Assertion and corresponding Monitor: -```json +```graphql mutation upsertDatasetFreshnessAssertionMonitor { upsertDatasetFreshnessAssertionMonitor( assertionUrn: "" @@ -393,7 +332,7 @@ mutation upsertDatasetFreshnessAssertionMonitor { } mode: ACTIVE } - ){ + ) { urn } } @@ -408,7 +347,7 @@ to capture changes, or where the data platform's mechanism is not reliable. In o ##### Examples -```json +```graphql mutation reportOperation { reportOperation( input: { diff --git a/docs/managed-datahub/observe/schema-assertions.md b/docs/managed-datahub/observe/schema-assertions.md new file mode 100644 index 0000000000000..b7869218932d8 --- /dev/null +++ b/docs/managed-datahub/observe/schema-assertions.md @@ -0,0 +1,290 @@ +--- +description: This page provides an overview of working with DataHub Schema Assertions +--- +import FeatureAvailability from '@site/src/components/FeatureAvailability'; + + +# Schema Assertions + + + +> The **Schema Assertions** feature is available as part of the **Acryl Observe** module of Acryl Cloud. +> If you are interested in learning more about **Acryl Observe** or trying it out, please [visit our website](https://www.acryldata.io/observe). + +## Introduction + +Can you remember a time when columns were unexpectedly added, removed, or altered for a key Table in your Data Warehouse? +Perhaps this caused downstream tables, views, dashboards, data pipelines, or AI models to break. + +There are many reasons why the structure of an important Table on Snowflake, Redshift, or BigQuery may schema change, breaking the expectations +of downstream consumers of the table. + +What if you could reduce the time to detect these incidents, so that the people responsible for the data were made aware of data +issues _before_ anyone else? With Acryl DataHub **Schema Assertions**, you can. + +Acryl DataHub allows users to define expectations about a table's columns and their data types, and will monitor and validate these expectations over +time, notifying you when a breaking change occurs. + +In this article, we'll cover the basics of monitoring Schema Assertions - what they are, how to configure them, and more - so that you and your team can +start building trust in your most important data assets. + +Let's get started! + +## Support + +Schema Assertions are currently supported for all data sources that provide a schema via the normal ingestion process. + +## What is a Schema Assertion? + +A **Schema Assertion** is a Data Quality rule used to monitor the columns in a particular table and their data types. +They allow you to define a set of "required" columns for the table along with their expected types, and then be notified +if anything changes via a failing assertion. + +This type of assertion can be particularly useful if you want to monitor the structure of a table which is outside of your +direct control, for example the result of an ETL process from an upstream application or tables provided by a 3rd party data vendor. It +allows you to get ahead of potentially breaking schema changes, by alerting you as soon as they occur, and before +they have a chance to negatively impact downstream assets. + +### Anatomy of a Schema Assertion + +At the most basic level, **Schema Assertions** consist of a few important parts: + +1. A **Condition Type** +2. A set of **Expected Columns** + +In this section, we'll give an overview of each. + +#### 1. Condition Type + +The **Condition Type** defines the conditions under which the Assertion will **fail**. More concretely, it determines +how the _expected_ columns should be compared to the _actual_ columns found in the schema to determine a passing or failing +state for the data quality check. + +The list of supported condition types: + +- **Contains**: The assertion will fail if the actual schema does not contain all expected columns and their types. +- **Exact Match**: The assertion will fail if the actual schema does not EXACTLY match the expected columns and their types. No + additional columns will be permitted. + +Schema Assertions will be evaluated whenever a change in the schema of the underlying table is detected. +They also have an off switch: they can be started or stopped at any time by pressing the start (play) or stop (pause) buttons. + + +#### 2. Expected Columns + +The **Expected Columns** are a set of column **names** along with their high-level **data +types** that should be used to compare against the _actual_ columns found in the table. By default, the expected column +set will be derived from the current set of columns found in the table. This conveniently allows you to "freeze" or "lock" +the current schema of a table in just a few clicks. + +Each "expected column" is composed of a + +1. **Name**: The name of the column that should be present in the table. Nested columns are supported in a flattened + fashion by simply providing a dot-separated path to the nested column. For example, `user.id` would be a nested column `id`. + In the case of a complex array or map, each field in the elements of the array or map will be treated as dot-delimited columns. + Note that verifying the specific type of object in primitive arrays or maps is not currently supported. Note that the comparison performed + is currently not case-sensitive. + +2. **Type**: The high-level data type of the column in the table. This type intentionally "high level" to allow for normal column widening practices + without the risk of failing the assertion unnecessarily. For example a `varchar(64)` and a `varchar(256)` will both resolve to the same high-level + "STRING" type. The currently supported set of data types include the following: + + - String + - Number + - Boolean + - Date + - Timestamp + - Struct + - Array + - Map + - Union + - Bytes + - Enum + +## Creating a Schema Assertion + +### Prerequisites + +- **Permissions**: To create or delete Schema Assertions for a specific entity on DataHub, you'll need to be granted the + `Edit Assertions`, `Edit Monitors` privileges for the entity. This will be granted to Entity owners as part of the `Asset Owners - Metadata Policy` + by default. + +Once these are in place, you're ready to create your Schema Assertions! + +### Steps + +1. Navigate to the Table you want to monitor +2. Click the **Validations** tab + +

+ +

+ +3. Click **+ Create Assertion** + +

+ +

+ +4. Choose **Schema** + +5. Select the **condition type**. + +6. Define the **expected columns** that will be continually compared against the actual column set. This defaults to the current columns for the table. + +

+ +

+ +7. Configure actions that should be taken when the assertion passes or fails + +

+ +

+ +- **Raise incident**: Automatically raise a new DataHub Incident for the Table whenever the Custom SQL Assertion is failing. This + may indicate that the Table is unfit for consumption. Configure Slack Notifications under **Settings** to be notified when + an incident is created due to an Assertion failure. + +- **Resolve incident**: Automatically resolved any incidents that were raised due to failures in this Custom SQL Assertion. Note that + any other incidents will not be impacted. + +Then click **Next**. + +7. (Optional) Add a **description** for the assertion. This is a human-readable description of the assertion. If you do not provide one, a description will be generated for you. + +

+ +

+ +8. Click **Save**. + +And that's it! DataHub will now begin to monitor your Schema Assertion for the table. + +Once your assertion has run, you will begin to see Success or Failure status: + +

+ +

+ + +## Stopping a Schema Assertion + +In order to temporarily stop the evaluation of the assertion: + +1. Navigate to the **Validations** tab of the Table with the assertion +2. Click **Schema** to open the Schema Assertion +3. Click the "Stop" button. + +

+ +

+ +To resume the assertion, simply click **Start**. + +

+ +

+ + +## Creating Schema Assertions via API + +Note that to create or delete Assertions and Monitors for a specific entity on DataHub, you'll need the +`Edit Assertions` and `Edit Monitors` privileges to create schema assertion via API. + +#### GraphQL + +In order to create a Schema Assertions, you can use the `upsertDatasetSchemaAssertionMonitor` mutation. + +##### Examples + +To create a Schema Assertion that checks for a the presence of a specific set of columns: + +```graphql +mutation upsertDatasetSchemaAssertionMonitor { + upsertDatasetSchemaAssertionMonitor( + input: { + entityUrn: "", + assertion: { + compatibility: SUPERSET, # How the actual columns will be compared against the expected fields (provided next) + fields: [ + { + path: "id", + type: STRING + }, + { + path: "count", + type: NUMBER + }, + { + path: "struct", + type: STRUCT + }, + { + path: "struct.nestedBooleanField", + type: BOOLEAN + } + ] + }, + description: "", + mode: ACTIVE + } + ) +} +``` + +The supported compatibility types are `EXACT_MATCH` and `SUPERSET` (Contains). + +You can use same endpoint with assertion urn input to update an existing Schema Assertion, simply add the `assertionUrn` field: + +```graphql +mutation upsertDatasetSchemaAssertionMonitor { + upsertDatasetSchemaAssertionMonitor( + assertionUrn: "urn:li:assertion:existing-assertion-id", + input: { + entityUrn: "", + assertion: { + compatibility: EXACT_MATCH, + fields: [ + { + path: "id", + type: STRING + }, + { + path: "count", + type: NUMBER + }, + { + path: "struct", + type: STRUCT + }, + { + path: "struct.nestedBooleanField", + type: BOOLEAN + } + ] + }, + description: "", + mode: ACTIVE + } + ) +} +``` + +You can delete assertions along with their monitors using GraphQL mutations: `deleteAssertion` and `deleteMonitor`. + +### Tips + +:::info +**Authorization** + +Remember to always provide a DataHub Personal Access Token when calling the GraphQL API. To do so, just add the 'Authorization' header as follows: + +``` +Authorization: Bearer +``` + +**Exploring GraphQL API** + +Also, remember that you can play with an interactive version of the Acryl GraphQL API at `https://your-account-id.acryl.io/api/graphiql` +::: diff --git a/docs/managed-datahub/observe/volume-assertions.md b/docs/managed-datahub/observe/volume-assertions.md index 2a98b37576c41..7d801933834ab 100644 --- a/docs/managed-datahub/observe/volume-assertions.md +++ b/docs/managed-datahub/observe/volume-assertions.md @@ -8,12 +8,8 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; - -> ⚠️ The **Volume Assertions** feature is currently in private beta, part of the **Acryl Observe** module, and may only be available to a -> limited set of design partners. -> -> If you are interested in trying it and providing feedback, please reach out to your Acryl Customer Success -> representative. +> The **Volume Assertions** feature is available as part of the **Acryl Observe** module of Acryl Cloud. +> If you are interested in learning more about **Acryl Observe** or trying it out, please [visit our website](https://www.acryldata.io/observe). ## Introduction @@ -45,6 +41,7 @@ Volume Assertions are currently supported for: 2. Redshift 3. BigQuery 4. Databricks +5. DataHub Dataset Profile (collected via ingestion) Note that an Ingestion Source _must_ be configured with the data platform of your choice in Acryl DataHub's **Ingestion** tab. @@ -137,10 +134,11 @@ Volume Assertions also have an off switch: they can be started or stopped at any ### Prerequisites 1. **Permissions**: To create or delete Volume Assertions for a specific entity on DataHub, you'll need to be granted the - `Edit Assertions` and `Edit Monitors` privileges for the entity. This is granted to Entity owners by default. + `Edit Assertions` and `Edit Monitors` privileges for the entity. This will be granted to Entity owners as part of the `Asset Owners - Metadata Policy` + by default. -2. **Data Platform Connection**: In order to create a Volume Assertion, you'll need to have an **Ingestion Source** configured to your - Data Platform: Snowflake, BigQuery, Redshift, or Databricks under the **Integrations** tab. +2. (Optional) **Data Platform Connection**: In order to create a Volume Assertion that queries the source data platform directly (instead of DataHub metadata), you'll need to have an **Ingestion Source** configured to your + Data Platform: Snowflake, BigQuery, or Redshift under the **Integrations** tab. Once these are in place, you're ready to create your Volume Assertions! @@ -149,14 +147,14 @@ Once these are in place, you're ready to create your Volume Assertions! 1. Navigate to the Table that to monitor for volume 2. Click the **Validations** tab -

- +

+

3. Click **+ Create Assertion** -

- +

+

4. Choose **Volume** @@ -166,69 +164,63 @@ Once these are in place, you're ready to create your Volume Assertions! 6. Configure the evaluation **condition type**. This determines the cases in which the new assertion will fail when it is evaluated. -

- +

+

7. (Optional) Click **Advanced** to customize the volume **source**. This is the mechanism that will be used to obtain the table row count metric. Each Data Platform supports different options including Information Schema, Query, and DataHub Dataset Profile. -

- +

+

- **Information Schema**: Check the Data Platform system metadata tables to determine the table row count. - **Query**: Issue a `COUNT(*)` query to the table to determine the row count. - **DataHub Dataset Profile**: Use the DataHub Dataset Profile metadata to determine the row count. -8. Click **Next** -9. Configure actions that should be taken when the Volume Assertion passes or fails +8. Configure actions that should be taken when the Volume Assertion passes or fails

- +

- **Raise incident**: Automatically raise a new DataHub `Volume` Incident for the Table whenever the Volume Assertion is failing. This may indicate that the Table is unfit for consumption. Configure Slack Notifications under **Settings** to be notified when an incident is created due to an Assertion failure. + - **Resolve incident**: Automatically resolved any incidents that were raised due to failures in this Volume Assertion. Note that any other incidents will not be impacted. +9. Click **Next** and provide a description. + 10. Click **Save**. And that's it! DataHub will now begin to monitor your Volume Assertion for the table. -To view the time of the next Volume Assertion evaluation, simply click **Volume** and then click on your -new Assertion: - -

- -

- Once your assertion has run, you will begin to see Success or Failure status for the Table -

- +

+

## Stopping a Volume Assertion -In order to temporarily stop the evaluation of a Volume Assertion: +In order to temporarily stop the evaluation of the assertion: 1. Navigate to the **Validations** tab of the Table with the assertion -2. Click **Volume** to open the Volume Assertions list -3. Click the three-dot menu on the right side of the assertion you want to disable -4. Click **Stop** +2. Click **Volume** to open the Volume Assertion assertions +3. Click the "Stop" button for the assertion you wish to pause.

- +

-To resume the Volume Assertion, simply click **Turn On**. +To resume the assertion, simply click **Start**. -

- +

+

@@ -241,7 +233,7 @@ requiring any manual setup. If Acryl DataHub is able to detect a pattern in the volume of a Snowflake, Redshift, BigQuery, or Databricks Table, you'll find a recommended Smart Assertion under the `Validations` tab on the Table profile page: -

+

@@ -253,7 +245,7 @@ Don't need it anymore? Smart Assertions can just as easily be turned off by clic ## Creating Volume Assertions via API -Under the hood, Acryl DataHub implements Volume Assertion Monitoring using two "entity" concepts: +Under the hood, Acryl DataHub implements Volume Assertion Monitoring using two concepts: - **Assertion**: The specific expectation for volume, e.g. "The table was changed int the past 7 hours" or "The table is changed on a schedule of every day by 8am". This is the "what". @@ -266,80 +258,13 @@ Note that to create or delete Assertions and Monitors for a specific entity on D #### GraphQL -In order to create a Volume Assertion that is being monitored on a specific **Evaluation Schedule**, you'll need to use 2 -GraphQL mutation queries to create a Volume Assertion entity and create an Assertion Monitor entity responsible for evaluating it. - -Start by creating the Volume Assertion entity using the `createVolumeAssertion` query and hang on to the 'urn' field of the Assertion entity -you get back. Then continue by creating a Monitor entity using the `createAssertionMonitor`. +In order to create or update a Volume Assertion, you can use the `upsertDatasetVolumeAssertionMonitor` mutation. ##### Examples -To create a Volume Assertion Entity that checks whether a table has been updated in the past 8 hours: - -```json -mutation createVolumeAssertion { - createVolumeAssertion( - input: { - entityUrn: "", - type: ROW_COUNT_TOTAL, - rowCountTotal: { - operator: BETWEEN, - parameters: { - minValue: { - "value": 10, - "type": NUMBER - }, - maxValue: { - "value": 20, - "type": NUMBER - } - } - } - } - ) { - urn -} -} -``` - -To create an assertion that specifies that the row count total should always fall between 10 and 20. +To create a Volume Assertion Entity that verifies that the row count for a table is between 10 and 20 rows, and runs every 8 hours: -The supported volume assertion types are `ROW_COUNT_TOTAL` and `ROW_COUNT_CHANGE`. Other (e.g. incrementing segment) types are not yet supported. -The supported operator types are `GREATER_THAN`, `GREATER_THAN_OR_EQUAL_TO`, `LESS_THAN`, `LESS_THAN_OR_EQUAL_TO`, and `BETWEEN` (requires minValue, maxValue). -The supported parameter types are `NUMBER`. - -To create an Assertion Monitor Entity that evaluates the volume assertion every 8 hours using the Information Schema: - -```json -mutation createAssertionMonitor { - createAssertionMonitor( - input: { - entityUrn: "", - assertionUrn: "", - schedule: { - cron: "0 */8 * * *", - timezone: "America/Los_Angeles" - }, - parameters: { - type: DATASET_VOLUME, - datasetVolumeParameters: { - sourceType: INFORMATION_SCHEMA, - } - } - } - ) { - urn - } -} -``` - -This entity defines _when_ to run the check (Using CRON format - every 8th hour) and _how_ to run the check (using the Information Schema). - -After creating the monitor, the new assertion will start to be evaluated every 8 hours in your selected timezone. - -Alternatively you can use `upsertDatasetVolumeAssertionMonitor` graphql endpoint for creating a Volume Assertion and corresponding Monitor. - -```json +```graphql mutation upsertDatasetVolumeAssertionMonitor { upsertDatasetVolumeAssertionMonitor( input: { @@ -373,9 +298,13 @@ mutation upsertDatasetVolumeAssertionMonitor { } ``` -You can use same endpoint with assertion urn input to update an existing Volume Assertion and corresponding Monitor. +The supported volume assertion types are `ROW_COUNT_TOTAL` and `ROW_COUNT_CHANGE`. Other (e.g. incrementing segment) types are not yet supported. +The supported operator types are `GREATER_THAN`, `GREATER_THAN_OR_EQUAL_TO`, `LESS_THAN`, `LESS_THAN_OR_EQUAL_TO`, and `BETWEEN` (requires minValue, maxValue). +The supported parameter types are `NUMBER`. + +You can use same endpoint with assertion urn input to update an existing Volume Assertion and corresponding Monitor: -```json +```graphql mutation upsertDatasetVolumeAssertionMonitor { upsertDatasetVolumeAssertionMonitor( assertionUrn: "" diff --git a/docs/managed-datahub/release-notes/v_0_3_2.md b/docs/managed-datahub/release-notes/v_0_3_2.md new file mode 100644 index 0000000000000..f2e5ab46e6423 --- /dev/null +++ b/docs/managed-datahub/release-notes/v_0_3_2.md @@ -0,0 +1,34 @@ +# v0.3.2 +--- + +Release Availability Date +--- +16-May-2024 + +Recommended CLI/SDK +--- +- `v0.13.2.3` with release notes at https://github.com/acryldata/datahub/releases/tag/v0.13.2.3 + +If you are using an older CLI/SDK version then please upgrade it. This applies for all CLI/SDK usages, if you are using it through your terminal, github actions, airflow, in python SDK somewhere, Java SKD etc. This is a strong recommendation to upgrade as we keep on pushing fixes in the CLI and it helps us support you better. + +## Release Changelog +--- +- Since `v0.3.1` these changes from OSS DataHub https://github.com/datahub-project/datahub/compare/57de905c66b6992aefb2051708fa83898fa82cec...6ed21bd1bc70a3ceb7dddb43ea7db4ca56874547 have been pulled in. +- OpenAPI Entitites v3 (Entity-registry generated endpoints) +- Business Attributes Support (disabled by default) +- Graph Retriever (validators, mutators, side-effects) +- New Lineage Graphql Endpoints (optimizations for the UI) +- Metadata Tests Tracks Unique Hash +- SCIM Support +- V3 CLL Across DataJob Nodes Upgrade +- Subscribe to assertion errors: Get notified when assertion errors occur +- Group owners are labeled in slack incident notifications +- Field assertions now raise incidents when they go in error (if configured to do so) +- Assertion timeline viz handles missing data more gracefully +- Freshness assertions on Snowflake using the AUDIT_LOG will no longer consider no-op queries as valid 'updates' to the dataset +- [NEW] Schema Assertions: Allows users to configure a set of columns and expected types and verify that they actually exist in the tabe schema. Disabled by default. Please reach out to Acryl team if you wish to have this enabled. +- Adding polish and fixing bugs throughout the new UI +- Ability to add and remove lineage manually in the new UI +- Ability to configure the default time filter on lineage +- Ability to collapse lineage in the lineage graph (new UI) +- Support rich text on metadata form descriptions diff --git a/docs/posts.md b/docs/posts.md index cdaf9d4325d0f..c44125bbd0017 100644 --- a/docs/posts.md +++ b/docs/posts.md @@ -1,6 +1,6 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; -# About DataHub Posts +# Posts DataHub allows users to make Posts that can be displayed on the app. Currently, Posts are only supported on the Home Page, but may be extended to other surfaces of the app in the future. Posts can be used to accomplish the following: diff --git a/docs/schema-history.md b/docs/schema-history.md index 120d041960186..e57b550dd98a7 100644 --- a/docs/schema-history.md +++ b/docs/schema-history.md @@ -1,6 +1,6 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; -# About DataHub Schema History +# Schema History diff --git a/docs/slack.md b/docs/slack.md index e1ef4593791e0..3e36b1f2ea3e3 100644 --- a/docs/slack.md +++ b/docs/slack.md @@ -2,7 +2,7 @@ The DataHub Slack is a thriving and rapidly growing community - we can't wait for you to join us! -_[Sign up here](https://datahubproject.io/slack?utm_source=docs&utm_medium=docs&utm_campaign=docs_page_link) to join us on Slack and to subscribe to the DataHub Community newsletter. Already a member? [Log in here](https://datahubproject.io/slack?utm_source=docs&utm_medium=docs&utm_campaign=docs_page_link)._ +_[Sign up here](https://datahubproject.io/slack?utm_source=docs&utm_medium=page_link&utm_campaign=docs_page_link) to join us on Slack and to subscribe to the DataHub Community newsletter. Already a member? [Log in here](https://datahubproject.io/slack?utm_source=docs&utm_medium=docs&utm_campaign=docs_page_link)._ ## Slack Guidelines diff --git a/docs/sync-status.md b/docs/sync-status.md index a249a324e561c..2e9fbcdb5b7bd 100644 --- a/docs/sync-status.md +++ b/docs/sync-status.md @@ -1,6 +1,6 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; -# About DataHub Sync Status +# Sync Status diff --git a/docs/tags.md b/docs/tags.md index f626ae79b1a0f..880e57f8d0a4f 100644 --- a/docs/tags.md +++ b/docs/tags.md @@ -1,6 +1,6 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability'; -# About DataHub Tags +# Tags diff --git a/entity-registry/build.gradle b/entity-registry/build.gradle index 1f27faf4811dc..484a1f3271dbb 100644 --- a/entity-registry/build.gradle +++ b/entity-registry/build.gradle @@ -15,7 +15,10 @@ dependencies { implementation externalDependency.jacksonDataBind implementation externalDependency.jacksonDataFormatYaml implementation externalDependency.reflections + api externalDependency.jsonPatch + implementation externalDependency.jsonPathImpl + constraints { implementation(externalDependency.snakeYaml) { because("previous versions are vulnerable to CVE-2022-25857") @@ -28,6 +31,7 @@ dependencies { testImplementation project(':test-models') testImplementation project(path: ':test-models', configuration: 'testDataTemplate') + testImplementation project(':metadata-utils') testImplementation externalDependency.testng testImplementation externalDependency.mockito testImplementation externalDependency.mockitoInline diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/AspectsBatch.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/AspectsBatch.java index 79f3a23c5c5e8..031625da0477c 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/AspectsBatch.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/AspectsBatch.java @@ -7,6 +7,7 @@ import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection; import com.linkedin.mxe.SystemMetadata; import com.linkedin.util.Pair; +import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; import java.util.List; @@ -15,6 +16,7 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; +import org.apache.commons.lang3.StringUtils; /** * A batch of aspects in the context of either an MCP or MCL write path to a data store. The item is @@ -191,5 +193,23 @@ static Map> merge( Pair::getValue, Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)))); } - String toAbbreviatedString(int maxWidth); + default String toAbbreviatedString(int maxWidth) { + return toAbbreviatedString(getItems(), maxWidth); + } + + static String toAbbreviatedString(Collection items, int maxWidth) { + List itemsAbbreviated = new ArrayList(); + items.forEach( + item -> { + if (item instanceof ChangeMCP) { + itemsAbbreviated.add(((ChangeMCP) item).toAbbreviatedString()); + } else { + itemsAbbreviated.add(item.toString()); + } + }); + return "AspectsBatchImpl{" + + "items=" + + StringUtils.abbreviate(itemsAbbreviated.toString(), maxWidth) + + '}'; + } } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/ChangeMCP.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/ChangeMCP.java index 19896e2b03544..18c7b477a9df8 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/ChangeMCP.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/ChangeMCP.java @@ -4,8 +4,10 @@ import com.linkedin.data.template.RecordTemplate; import com.linkedin.metadata.aspect.SystemAspect; import java.lang.reflect.InvocationTargetException; +import java.util.Optional; import javax.annotation.Nonnull; import javax.annotation.Nullable; +import org.apache.commons.lang3.StringUtils; /** * A proposal to write data to the primary datastore which includes system metadata and other @@ -47,4 +49,24 @@ default T getPreviousAspect(Class clazz) { } return null; } + + default String toAbbreviatedString() { + return "ChangeMCP{" + + "changeType=" + + getChangeType() + + ", urn=" + + getUrn() + + ", aspectName='" + + getAspectName() + + '\'' + + ", recordTemplate=" + + Optional.ofNullable(getRecordTemplate()) + .map(template -> StringUtils.abbreviate(template.toString(), 256)) + .orElse("") + + ", systemMetadata=" + + Optional.ofNullable(getSystemMetadata()) + .map(systemMetadata -> StringUtils.abbreviate(systemMetadata.toString(), 128)) + .orElse("") + + '}'; + } } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/PatchMCP.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/PatchMCP.java index f04133e9e1ff8..293ef90a25f81 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/PatchMCP.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/batch/PatchMCP.java @@ -1,8 +1,8 @@ package com.linkedin.metadata.aspect.batch; -import com.github.fge.jsonpatch.Patch; import com.linkedin.data.template.RecordTemplate; import com.linkedin.metadata.aspect.AspectRetriever; +import jakarta.json.JsonPatch; /** * A change proposal represented as a patch to an exiting stored object in the primary data store. @@ -17,5 +17,5 @@ public interface PatchMCP extends MCPItem { */ ChangeMCP applyPatch(RecordTemplate recordTemplate, AspectRetriever aspectRetriever); - Patch getPatch(); + JsonPatch getPatch(); } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/hooks/StructuredPropertiesSoftDelete.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/hooks/StructuredPropertiesSoftDelete.java index dbe700219946c..5efb1e8aebb06 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/hooks/StructuredPropertiesSoftDelete.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/hooks/StructuredPropertiesSoftDelete.java @@ -13,11 +13,15 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; +@Getter +@Setter +@Accessors(chain = true) public class StructuredPropertiesSoftDelete extends MutationHook { - public StructuredPropertiesSoftDelete(AspectPluginConfig aspectPluginConfig) { - super(aspectPluginConfig); - } + @Nonnull private AspectPluginConfig config; @Override protected Stream> readMutation( diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/GenericJsonPatch.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/GenericJsonPatch.java index 484603b9c1f85..09392330c81a5 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/GenericJsonPatch.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/GenericJsonPatch.java @@ -1,12 +1,15 @@ package com.linkedin.metadata.aspect.patch; import com.fasterxml.jackson.annotation.JsonIgnore; -import com.fasterxml.jackson.databind.JsonNode; -import com.github.fge.jsonpatch.JsonPatch; -import java.io.IOException; +import com.linkedin.util.Pair; +import jakarta.json.Json; +import jakarta.json.JsonArrayBuilder; +import jakarta.json.JsonPatch; import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.AllArgsConstructor; @@ -21,7 +24,7 @@ public class GenericJsonPatch { @Nullable private Map> arrayPrimaryKeys; - @Nonnull private JsonNode patch; + @Nonnull private List patch; @Nonnull public Map> getArrayPrimaryKeys() { @@ -29,7 +32,27 @@ public Map> getArrayPrimaryKeys() { } @JsonIgnore - public JsonPatch getJsonPatch() throws IOException { - return JsonPatch.fromJson(patch); + public JsonPatch getJsonPatch() { + JsonArrayBuilder arrayBuilder = Json.createArrayBuilder(); + patch.forEach(op -> arrayBuilder.add(Json.createObjectBuilder(op.toMap()))); + return Json.createPatch(arrayBuilder.build()); + } + + @Data + @NoArgsConstructor + public static class PatchOp { + @Nonnull private String op; + @Nonnull private String path; + @Nullable private Object value; + + public Map toMap() { + if (value != null) { + return Stream.of(Pair.of("op", op), Pair.of("path", path), Pair.of("value", value)) + .collect(Collectors.toMap(Pair::getKey, Pair::getValue)); + } else { + return Stream.of(Pair.of("op", op), Pair.of("path", path)) + .collect(Collectors.toMap(Pair::getKey, Pair::getValue)); + } + } } } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/AspectTemplateEngine.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/AspectTemplateEngine.java index 4613396109cc1..0818241df8155 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/AspectTemplateEngine.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/AspectTemplateEngine.java @@ -15,10 +15,9 @@ import static com.linkedin.metadata.Constants.UPSTREAM_LINEAGE_ASPECT_NAME; import com.fasterxml.jackson.core.JsonProcessingException; -import com.github.fge.jsonpatch.JsonPatchException; -import com.github.fge.jsonpatch.Patch; import com.linkedin.data.template.RecordTemplate; import com.linkedin.metadata.models.AspectSpec; +import jakarta.json.JsonPatch; import java.util.HashMap; import java.util.Map; import java.util.Set; @@ -75,12 +74,11 @@ public RecordTemplate getDefaultTemplate(String aspectSpecName) { * @param aspectSpec aspectSpec of the template * @return a {@link RecordTemplate} with the patch applied * @throws JsonProcessingException if there is an issue with processing the record template's json - * @throws JsonPatchException if there is an issue with applying the json patch */ @Nonnull public RecordTemplate applyPatch( - RecordTemplate recordTemplate, Patch jsonPatch, AspectSpec aspectSpec) - throws JsonProcessingException, JsonPatchException { + RecordTemplate recordTemplate, JsonPatch jsonPatch, AspectSpec aspectSpec) + throws JsonProcessingException { Template template = getTemplate(aspectSpec); return template.applyPatch(recordTemplate, jsonPatch); } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/CompoundKeyTemplate.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/CompoundKeyTemplate.java index 78cf14c47a0bf..2b6c8f9409d26 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/CompoundKeyTemplate.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/CompoundKeyTemplate.java @@ -1,23 +1,29 @@ package com.linkedin.metadata.aspect.patch.template; +import static com.linkedin.metadata.aspect.patch.template.TemplateUtil.OBJECT_MAPPER; import static com.linkedin.metadata.aspect.patch.template.TemplateUtil.populateTopLevelKeys; import com.datahub.util.RecordUtils; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; -import com.github.fge.jsonpatch.JsonPatchException; -import com.github.fge.jsonpatch.Patch; import com.linkedin.data.template.RecordTemplate; +import jakarta.json.Json; +import jakarta.json.JsonObject; +import jakarta.json.JsonPatch; +import java.io.StringReader; public abstract class CompoundKeyTemplate implements ArrayMergingTemplate { @Override - public T applyPatch(RecordTemplate recordTemplate, Patch jsonPatch) - throws JsonProcessingException, JsonPatchException { + public T applyPatch(RecordTemplate recordTemplate, JsonPatch jsonPatch) + throws JsonProcessingException { JsonNode transformed = populateTopLevelKeys(preprocessTemplate(recordTemplate), jsonPatch); - JsonNode patched = jsonPatch.apply(transformed); - JsonNode postProcessed = rebaseFields(patched); + JsonObject patched = + jsonPatch.apply( + Json.createReader(new StringReader(OBJECT_MAPPER.writeValueAsString(transformed))) + .readObject()); + JsonNode postProcessed = rebaseFields(OBJECT_MAPPER.readTree(patched.toString())); return RecordUtils.toRecordTemplate(getTemplateType(), postProcessed.toString()); } } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/Template.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/Template.java index bd8cd544fb59b..06acb4cb31896 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/Template.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/Template.java @@ -6,9 +6,11 @@ import com.datahub.util.RecordUtils; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; -import com.github.fge.jsonpatch.JsonPatchException; -import com.github.fge.jsonpatch.Patch; import com.linkedin.data.template.RecordTemplate; +import jakarta.json.Json; +import jakarta.json.JsonObject; +import jakarta.json.JsonPatch; +import java.io.StringReader; import javax.annotation.Nonnull; public interface Template { @@ -45,18 +47,21 @@ default T getSubtype(RecordTemplate recordTemplate) throws ClassCastException { * @param jsonPatch patch to apply * @return patched value * @throws JsonProcessingException if there is an issue converting the input to JSON - * @throws JsonPatchException if there is an issue applying the patch */ - default T applyPatch(RecordTemplate recordTemplate, Patch jsonPatch) - throws JsonProcessingException, JsonPatchException { - + default T applyPatch(RecordTemplate recordTemplate, JsonPatch jsonPatch) + throws JsonProcessingException { TemplateUtil.validatePatch(jsonPatch); + JsonNode transformed = populateTopLevelKeys(preprocessTemplate(recordTemplate), jsonPatch); try { - JsonNode patched = jsonPatch.apply(transformed); - JsonNode postProcessed = rebaseFields(patched); + // Hack in a more efficient patcher. Even with the serialization overhead 140% faster + JsonObject patched = + jsonPatch.apply( + Json.createReader(new StringReader(OBJECT_MAPPER.writeValueAsString(transformed))) + .readObject()); + JsonNode postProcessed = rebaseFields(OBJECT_MAPPER.readTree(patched.toString())); return RecordUtils.toRecordTemplate(getTemplateType(), postProcessed.toString()); - } catch (JsonPatchException e) { + } catch (JsonProcessingException e) { throw new RuntimeException( String.format( "Error performing JSON PATCH on aspect %s. Patch: %s Target: %s", diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/TemplateUtil.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/TemplateUtil.java index d998692f2c388..be3fc4c1fc983 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/TemplateUtil.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/TemplateUtil.java @@ -8,9 +8,10 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ObjectNode; -import com.github.fge.jsonpatch.Patch; import com.linkedin.metadata.aspect.patch.PatchOperationType; import com.linkedin.util.Pair; +import jakarta.json.JsonPatch; +import jakarta.json.JsonValue; import java.util.ArrayList; import java.util.List; @@ -30,34 +31,32 @@ private TemplateUtil() {} .setStreamReadConstraints(StreamReadConstraints.builder().maxStringLength(maxSize).build()); } - public static List> getPaths(Patch jsonPatch) { - JsonNode patchNode = OBJECT_MAPPER.valueToTree(jsonPatch); + public static List> getPaths(JsonPatch jsonPatch) { List> paths = new ArrayList<>(); - patchNode - .elements() - .forEachRemaining( + jsonPatch.toJsonArray().stream() + .map(JsonValue::asJsonObject) + .forEach( node -> paths.add( Pair.of( - PatchOperationType.valueOf(node.get("op").asText().toUpperCase()), - node.get("path").asText()))); + PatchOperationType.valueOf(node.getString("op").toUpperCase()), + node.getString("path")))); return paths; } - public static void validatePatch(Patch jsonPatch) { + public static void validatePatch(JsonPatch jsonPatch) { // ensure supported patch operations - JsonNode patchNode = OBJECT_MAPPER.valueToTree(jsonPatch); - patchNode - .elements() - .forEachRemaining( - node -> { + jsonPatch.toJsonArray().stream() + .map(JsonValue::asJsonObject) + .forEach( + jsonObject -> { try { - PatchOperationType.valueOf(node.get("op").asText().toUpperCase()); + PatchOperationType.valueOf(jsonObject.getString("op").toUpperCase()); } catch (Exception e) { throw new RuntimeException( String.format( "Unsupported PATCH operation: `%s` Operation `%s`", - node.get("op").asText(), node), + jsonObject.getString("op"), jsonObject), e); } }); @@ -70,7 +69,7 @@ public static void validatePatch(Patch jsonPatch) { * @param transformedNode transformed node to have keys populated * @return transformed node that has top level keys populated */ - public static JsonNode populateTopLevelKeys(JsonNode transformedNode, Patch jsonPatch) { + public static JsonNode populateTopLevelKeys(JsonNode transformedNode, JsonPatch jsonPatch) { JsonNode transformedNodeClone = transformedNode.deepCopy(); List> paths = getPaths(jsonPatch); for (Pair operationPath : paths) { diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/common/GenericPatchTemplate.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/common/GenericPatchTemplate.java index 3a3e3c99f25a3..3658ba3c54463 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/common/GenericPatchTemplate.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/patch/template/common/GenericPatchTemplate.java @@ -1,7 +1,6 @@ package com.linkedin.metadata.aspect.patch.template.common; import com.fasterxml.jackson.databind.JsonNode; -import com.github.fge.jsonpatch.JsonPatchException; import com.linkedin.data.template.RecordTemplate; import com.linkedin.metadata.aspect.patch.GenericJsonPatch; import com.linkedin.metadata.aspect.patch.template.CompoundKeyTemplate; @@ -53,7 +52,7 @@ public JsonNode rebaseFields(JsonNode patched) { return transformedNode; } - public T applyPatch(RecordTemplate recordTemplate) throws IOException, JsonPatchException { + public T applyPatch(RecordTemplate recordTemplate) throws IOException { return super.applyPatch(recordTemplate, genericJsonPatch.getJsonPatch()); } } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginFactory.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginFactory.java index 66b362542ff7f..c812aea0c55d7 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginFactory.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginFactory.java @@ -16,6 +16,8 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.concurrent.ConcurrentHashMap; +import java.util.function.BiFunction; import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -24,21 +26,13 @@ import javax.annotation.Nullable; import lombok.Getter; import lombok.extern.slf4j.Slf4j; -import org.apache.commons.lang3.ArrayUtils; @Slf4j public class PluginFactory { - private static final String[] VALIDATOR_PACKAGES = { - "com.linkedin.metadata.aspect.plugins.validation", "com.linkedin.metadata.aspect.validation" - }; - private static final String[] HOOK_PACKAGES = { - "com.linkedin.metadata.aspect.plugins.hooks", "com.linkedin.metadata.aspect.hooks" - }; - public static PluginFactory withCustomClasspath( @Nullable PluginConfiguration pluginConfiguration, @Nonnull List classLoaders) { - return new PluginFactory(pluginConfiguration, classLoaders); + return new PluginFactory(pluginConfiguration, classLoaders).loadPlugins(); } public static PluginFactory withConfig(@Nullable PluginConfiguration pluginConfiguration) { @@ -49,44 +43,135 @@ public static PluginFactory empty() { return PluginFactory.withConfig(PluginConfiguration.EMPTY); } - public static PluginFactory merge(PluginFactory a, PluginFactory b) { - return PluginFactory.withCustomClasspath( - PluginConfiguration.merge(a.getPluginConfiguration(), b.getPluginConfiguration()), + public static PluginFactory merge( + PluginFactory a, + PluginFactory b, + @Nullable + BiFunction, PluginFactory> pluginFactoryProvider) { + PluginConfiguration mergedPluginConfig = + PluginConfiguration.merge(a.pluginConfiguration, b.pluginConfiguration); + List mergedClassLoaders = Stream.concat(a.getClassLoaders().stream(), b.getClassLoaders().stream()) - .collect(Collectors.toList())); + .collect(Collectors.toList()); + + if (pluginFactoryProvider != null) { + return pluginFactoryProvider.apply(mergedPluginConfig, mergedClassLoaders); + } else { + return PluginFactory.withCustomClasspath(mergedPluginConfig, mergedClassLoaders); + } } @Getter private final PluginConfiguration pluginConfiguration; @Nonnull @Getter private final List classLoaders; - @Getter private final List aspectPayloadValidators; - @Getter private final List mutationHooks; - @Getter private final List mclSideEffects; - @Getter private final List mcpSideEffects; + @Getter private List aspectPayloadValidators; + @Getter private List mutationHooks; + @Getter private List mclSideEffects; + @Getter private List mcpSideEffects; - private final ClassGraph classGraph; + private static final Map> pluginCache = new ConcurrentHashMap<>(); public PluginFactory( @Nullable PluginConfiguration pluginConfiguration, @Nonnull List classLoaders) { - this.classGraph = - new ClassGraph() - .acceptPackages(ArrayUtils.addAll(HOOK_PACKAGES, VALIDATOR_PACKAGES)) - .enableRemoteJarScanning() - .enableExternalClasses() - .enableClassInfo() - .enableMethodInfo(); - this.classLoaders = classLoaders; - - if (!this.classLoaders.isEmpty()) { - classLoaders.forEach(this.classGraph::addClassLoader); - } - this.pluginConfiguration = pluginConfiguration == null ? PluginConfiguration.EMPTY : pluginConfiguration; + } + + public PluginFactory loadPlugins() { this.aspectPayloadValidators = buildAspectPayloadValidators(this.pluginConfiguration); this.mutationHooks = buildMutationHooks(this.pluginConfiguration); this.mclSideEffects = buildMCLSideEffects(this.pluginConfiguration); this.mcpSideEffects = buildMCPSideEffects(this.pluginConfiguration); + return this; + } + + /** + * Memory intensive operation because of the size of the jars. Limit packages, classes scanned, + * cache results + * + * @param configs plugin configurations + * @return auto-closeable scan result + */ + protected static List initPlugins( + @Nonnull List classLoaders, + @Nonnull Class baseClazz, + @Nonnull List packageNames, + @Nonnull List configs) { + + List classNames = + configs.stream().map(AspectPluginConfig::getClassName).collect(Collectors.toList()); + + if (classNames.isEmpty()) { + return Collections.emptyList(); + } else { + long key = + IntStream.concat( + classLoaders.stream().mapToInt(Object::hashCode), + IntStream.concat( + IntStream.of(baseClazz.getName().hashCode()), + configs.stream().mapToInt(AspectPluginConfig::hashCode))) + .sum(); + + return (List) + pluginCache.computeIfAbsent( + key, + k -> { + try { + ClassGraph classGraph = + new ClassGraph() + .acceptPackages(packageNames.stream().distinct().toArray(String[]::new)) + .acceptClasses(classNames.stream().distinct().toArray(String[]::new)) + .enableRemoteJarScanning() + .enableExternalClasses() + .enableClassInfo() + .enableMethodInfo(); + if (!classLoaders.isEmpty()) { + classLoaders.forEach(classGraph::addClassLoader); + } + + try (ScanResult scanResult = classGraph.scan()) { + Map classMap = + scanResult.getSubclasses(baseClazz).stream() + .collect(Collectors.toMap(ClassInfo::getName, Function.identity())); + + return configs.stream() + .map( + config -> { + try { + ClassInfo classInfo = classMap.get(config.getClassName()); + if (classInfo == null) { + throw new IllegalStateException( + String.format( + "The following class cannot be loaded: %s", + config.getClassName())); + } + MethodInfo constructorMethod = + classInfo.getConstructorInfo().get(0); + return ((T) + constructorMethod + .loadClassAndGetConstructor() + .newInstance()) + .setConfig(config); + } catch (Exception e) { + log.error( + "Error constructing entity registry plugin class: {}", + config.getClassName(), + e); + return (T) null; + } + }) + .filter(Objects::nonNull) + .filter(PluginSpec::enabled) + .collect(Collectors.toList()); + } + } catch (Exception e) { + throw new IllegalArgumentException( + String.format( + "Failed to load entity registry plugins: %s.", baseClazz.getName()), + e); + } + }); + } } /** @@ -187,15 +272,18 @@ private List buildAspectPayloadValidators( : applyDisable( build( AspectPayloadValidator.class, - pluginConfiguration.getAspectPayloadValidators(), - VALIDATOR_PACKAGES)); + pluginConfiguration.validatorPackages(), + pluginConfiguration.getAspectPayloadValidators())); } private List buildMutationHooks(@Nullable PluginConfiguration pluginConfiguration) { return pluginConfiguration == null ? Collections.emptyList() : applyDisable( - build(MutationHook.class, pluginConfiguration.getMutationHooks(), HOOK_PACKAGES)); + build( + MutationHook.class, + pluginConfiguration.mutationPackages(), + pluginConfiguration.getMutationHooks())); } private List buildMCLSideEffects( @@ -203,7 +291,10 @@ private List buildMCLSideEffects( return pluginConfiguration == null ? Collections.emptyList() : applyDisable( - build(MCLSideEffect.class, pluginConfiguration.getMclSideEffects(), HOOK_PACKAGES)); + build( + MCLSideEffect.class, + pluginConfiguration.mclSideEffectPackages(), + pluginConfiguration.getMclSideEffects())); } private List buildMCPSideEffects( @@ -211,44 +302,37 @@ private List buildMCPSideEffects( return pluginConfiguration == null ? Collections.emptyList() : applyDisable( - build(MCPSideEffect.class, pluginConfiguration.getMcpSideEffects(), HOOK_PACKAGES)); + build( + MCPSideEffect.class, + pluginConfiguration.mcpSideEffectPackages(), + pluginConfiguration.getMcpSideEffects())); } - private List build( - Class baseClazz, List configs, String... packageNames) { - try (ScanResult scanResult = classGraph.acceptPackages(packageNames).scan()) { - - Map classMap = - scanResult.getSubclasses(baseClazz).stream() - .collect(Collectors.toMap(ClassInfo::getName, Function.identity())); - - return configs.stream() - .flatMap( - config -> { - try { - ClassInfo classInfo = classMap.get(config.getClassName()); - if (classInfo == null) { - throw new IllegalStateException( - String.format( - "The following class cannot be loaded: %s", config.getClassName())); - } - MethodInfo constructorMethod = classInfo.getConstructorInfo().get(0); - return Stream.of( - (T) constructorMethod.loadClassAndGetConstructor().newInstance(config)); - } catch (Exception e) { - log.error( - "Error constructing entity registry plugin class: {}", - config.getClassName(), - e); - return Stream.empty(); - } - }) - .collect(Collectors.toList()); + /** + * Load plugins given the base class (i.e. a validator) and the name of the implementing class + * found in the configuration objects. + * + *

For performance reasons, scan the packages found in packageNames + * + *

Designed to avoid any Spring dependency, see alternative implementation for Spring + * + * @param baseClazz base class for the plugin + * @param configs configuration with implementing class information + * @param packageNames package names to scan + * @return list of plugin instances + * @param the plugin class + */ + protected List build( + Class baseClazz, List packageNames, List configs) { + List nonSpringConfigs = + configs.stream() + .filter( + config -> + config.getSpring() == null + || Boolean.FALSE.equals(config.getSpring().isEnabled())) + .collect(Collectors.toList()); - } catch (Exception e) { - throw new IllegalArgumentException( - String.format("Failed to load entity registry plugins: %s.", baseClazz.getName()), e); - } + return initPlugins(classLoaders, baseClazz, packageNames, nonSpringConfigs); } @Nonnull diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginSpec.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginSpec.java index 564fbf32e809f..1adb1be81ecc1 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginSpec.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/PluginSpec.java @@ -15,10 +15,13 @@ public abstract class PluginSpec { protected static String ENTITY_WILDCARD = "*"; - private final AspectPluginConfig aspectPluginConfig; + @Nonnull + public abstract AspectPluginConfig getConfig(); - protected AspectPluginConfig getConfig() { - return this.aspectPluginConfig; + public abstract PluginSpec setConfig(@Nonnull AspectPluginConfig config); + + public boolean enabled() { + return true; } public boolean shouldApply( diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/config/AspectPluginConfig.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/config/AspectPluginConfig.java index 8d9a8d6fc6a69..e10bdd98cd18a 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/config/AspectPluginConfig.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/config/AspectPluginConfig.java @@ -16,21 +16,14 @@ @Builder public class AspectPluginConfig { @Nonnull private String className; + @Nullable private List packageScan; + private boolean enabled; @Nullable private List supportedOperations; @Nonnull private List supportedEntityAspectNames; - @Data - @NoArgsConstructor - @AllArgsConstructor - @Builder - public static class EntityAspectName { - public static final EntityAspectName ALL = new EntityAspectName("*", "*"); - - @Nonnull private String entityName; - @Nonnull private String aspectName; - } + @Nullable private SpringPluginConfig spring; @Nonnull public List getSupportedOperations() { @@ -47,6 +40,26 @@ public boolean isDisabledBy(AspectPluginConfig o) { return enabled && this.isEqualExcludingEnabled(o) && !o.enabled; } + @Data + @NoArgsConstructor + @AllArgsConstructor + @Builder + public static class EntityAspectName { + public static final EntityAspectName ALL = new EntityAspectName("*", "*"); + + @Nonnull private String entityName; + @Nonnull private String aspectName; + } + + @Data + @NoArgsConstructor + @AllArgsConstructor + @Builder + public static class SpringPluginConfig { + private boolean enabled; + @Nullable private String name; + } + private boolean isEqualExcludingEnabled(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; @@ -54,7 +67,9 @@ private boolean isEqualExcludingEnabled(Object o) { AspectPluginConfig that = (AspectPluginConfig) o; if (!className.equals(that.className)) return false; + if (!Objects.equals(packageScan, that.getPackageScan())) return false; if (!Objects.equals(supportedOperations, that.supportedOperations)) return false; + if (!Objects.equals(spring, that.spring)) return false; return supportedEntityAspectNames.equals(that.supportedEntityAspectNames); } } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/config/PluginConfiguration.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/config/PluginConfiguration.java index a2caab7be5f80..e9494c49a9efb 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/config/PluginConfiguration.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/config/PluginConfiguration.java @@ -1,5 +1,6 @@ package com.linkedin.metadata.aspect.plugins.config; +import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.stream.Collectors; @@ -12,6 +13,13 @@ @AllArgsConstructor @NoArgsConstructor public class PluginConfiguration { + private static final String[] VALIDATOR_PACKAGES = { + "com.linkedin.metadata.aspect.plugins.validation", "com.linkedin.metadata.aspect.validation" + }; + private static final String[] HOOK_PACKAGES = { + "com.linkedin.metadata.aspect.plugins.hooks", "com.linkedin.metadata.aspect.hooks" + }; + private List aspectPayloadValidators = Collections.emptyList(); private List mutationHooks = Collections.emptyList(); private List mclSideEffects = Collections.emptyList(); @@ -31,4 +39,56 @@ public static PluginConfiguration merge(PluginConfiguration a, PluginConfigurati Stream.concat(a.getMcpSideEffects().stream(), b.getMcpSideEffects().stream()) .collect(Collectors.toList())); } + + public Stream streamAll() { + return Stream.concat( + Stream.concat( + Stream.concat(aspectPayloadValidators.stream(), mutationHooks.stream()), + mclSideEffects.stream()), + mcpSideEffects.stream()); + } + + public List validatorPackages() { + return aspectPayloadValidators.stream() + .flatMap( + cfg -> + cfg.getPackageScan() != null + ? cfg.getPackageScan().stream() + : Arrays.stream(VALIDATOR_PACKAGES)) + .distinct() + .collect(Collectors.toList()); + } + + public List mcpSideEffectPackages() { + return mcpSideEffects.stream() + .flatMap( + cfg -> + cfg.getPackageScan() != null + ? cfg.getPackageScan().stream() + : Arrays.stream(HOOK_PACKAGES)) + .distinct() + .collect(Collectors.toList()); + } + + public List mclSideEffectPackages() { + return mclSideEffects.stream() + .flatMap( + cfg -> + cfg.getPackageScan() != null + ? cfg.getPackageScan().stream() + : Arrays.stream(HOOK_PACKAGES)) + .distinct() + .collect(Collectors.toList()); + } + + public List mutationPackages() { + return mutationHooks.stream() + .flatMap( + cfg -> + cfg.getPackageScan() != null + ? cfg.getPackageScan().stream() + : Arrays.stream(HOOK_PACKAGES)) + .distinct() + .collect(Collectors.toList()); + } } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCLSideEffect.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCLSideEffect.java index 902e928c13771..57016404648d5 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCLSideEffect.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCLSideEffect.java @@ -3,7 +3,6 @@ import com.linkedin.metadata.aspect.RetrieverContext; import com.linkedin.metadata.aspect.batch.MCLItem; import com.linkedin.metadata.aspect.plugins.PluginSpec; -import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; import java.util.Collection; import java.util.function.BiFunction; import java.util.stream.Collectors; @@ -14,10 +13,6 @@ public abstract class MCLSideEffect extends PluginSpec implements BiFunction, RetrieverContext, Stream> { - public MCLSideEffect(AspectPluginConfig aspectPluginConfig) { - super(aspectPluginConfig); - } - /** * Given a list of MCLs, output additional MCLs * diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffect.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffect.java index e8c703fa20717..845f967c0a528 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffect.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffect.java @@ -3,7 +3,6 @@ import com.linkedin.metadata.aspect.RetrieverContext; import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.aspect.plugins.PluginSpec; -import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; import java.util.Collection; import java.util.function.BiFunction; import java.util.stream.Collectors; @@ -14,10 +13,6 @@ public abstract class MCPSideEffect extends PluginSpec implements BiFunction, RetrieverContext, Stream> { - public MCPSideEffect(AspectPluginConfig aspectPluginConfig) { - super(aspectPluginConfig); - } - /** * Given the list of MCP upserts, output additional upserts * diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MutationHook.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MutationHook.java index 7ec6c7adfac46..c067954912a03 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MutationHook.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/MutationHook.java @@ -4,7 +4,6 @@ import com.linkedin.metadata.aspect.RetrieverContext; import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.aspect.plugins.PluginSpec; -import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; import com.linkedin.util.Pair; import java.util.Collection; import java.util.stream.Collectors; @@ -14,10 +13,6 @@ /** Applies changes to the RecordTemplate prior to write */ public abstract class MutationHook extends PluginSpec { - public MutationHook(AspectPluginConfig aspectPluginConfig) { - super(aspectPluginConfig); - } - /** * Mutating hook, original objects are potentially modified. * diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectPayloadValidator.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectPayloadValidator.java index fd03ca86d74a8..b39c38c2768a7 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectPayloadValidator.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/plugins/validation/AspectPayloadValidator.java @@ -4,7 +4,6 @@ import com.linkedin.metadata.aspect.batch.BatchItem; import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.aspect.plugins.PluginSpec; -import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; import java.util.Collection; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -12,10 +11,6 @@ public abstract class AspectPayloadValidator extends PluginSpec { - public AspectPayloadValidator(AspectPluginConfig aspectPluginConfig) { - super(aspectPluginConfig); - } - /** * Validate a proposal for the given change type for an aspect within the context of the given * entity's urn. diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/CreateIfNotExistsValidator.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/CreateIfNotExistsValidator.java index 47814bef26e9a..2ad885dc9fdd2 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/CreateIfNotExistsValidator.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/CreateIfNotExistsValidator.java @@ -16,13 +16,17 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; /** Common implementation of checking for create if not exists semantics. */ +@Setter +@Getter +@Accessors(chain = true) public class CreateIfNotExistsValidator extends AspectPayloadValidator { - public CreateIfNotExistsValidator(AspectPluginConfig aspectPluginConfig) { - super(aspectPluginConfig); - } + @Nonnull private AspectPluginConfig config; @Override protected Stream validatePreCommitAspects( diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/PropertyDefinitionValidator.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/PropertyDefinitionValidator.java index 436c863ad048c..a4efc38d16082 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/PropertyDefinitionValidator.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/PropertyDefinitionValidator.java @@ -31,12 +31,15 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; +@Getter +@Setter +@Accessors(chain = true) public class PropertyDefinitionValidator extends AspectPayloadValidator { - - public PropertyDefinitionValidator(AspectPluginConfig aspectPluginConfig) { - super(aspectPluginConfig); - } + private AspectPluginConfig config; /** * Prevent deletion of the definition or key aspect (only soft delete) diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/StructuredPropertiesValidator.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/StructuredPropertiesValidator.java index 31c0a1a9093f7..fcae6ca8cb71a 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/StructuredPropertiesValidator.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/validation/StructuredPropertiesValidator.java @@ -27,6 +27,7 @@ import com.linkedin.structured.StructuredProperties; import com.linkedin.structured.StructuredPropertyDefinition; import com.linkedin.structured.StructuredPropertyValueAssignment; +import com.linkedin.util.Pair; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Arrays; @@ -38,13 +39,21 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; import lombok.extern.slf4j.Slf4j; /** A Validator for StructuredProperties Aspect that is attached to entities like Datasets, etc. */ +@Setter +@Getter @Slf4j +@Accessors(chain = true) public class StructuredPropertiesValidator extends AspectPayloadValidator { private static final Set CHANGE_TYPES = ImmutableSet.of(ChangeType.CREATE, ChangeType.CREATE_ENTITY, ChangeType.UPSERT); @@ -57,10 +66,6 @@ public class StructuredPropertiesValidator extends AspectPayloadValidator { LogicalValueType.DATE, LogicalValueType.URN)); - public StructuredPropertiesValidator(AspectPluginConfig aspectPluginConfig) { - super(aspectPluginConfig); - } - public static LogicalValueType getLogicalValueType(Urn valueType) { String valueTypeId = getValueTypeId(valueType); if (valueTypeId.equals("string")) { @@ -78,6 +83,8 @@ public static LogicalValueType getLogicalValueType(Urn valueType) { return LogicalValueType.UNKNOWN; } + @Nonnull private AspectPluginConfig config; + @Override protected Stream validateProposedAspects( @Nonnull Collection mcpItems, @@ -92,20 +99,22 @@ protected Stream validateProposedAspects( @Override protected Stream validatePreCommitAspects( @Nonnull Collection changeMCPs, @Nonnull RetrieverContext retrieverContext) { - return Stream.empty(); + return validateImmutable( + changeMCPs.stream() + .filter( + i -> + ChangeType.DELETE.equals(i.getChangeType()) + || CHANGE_TYPES.contains(i.getChangeType())) + .collect(Collectors.toList()), + retrieverContext.getAspectRetriever()); } public static Stream validateProposedUpserts( @Nonnull Collection mcpItems, @Nonnull AspectRetriever aspectRetriever) { ValidationExceptionCollection exceptions = ValidationExceptionCollection.newCollection(); - - // Validate propertyUrns - Set validPropertyUrns = validateStructuredPropertyUrns(mcpItems, exceptions); - - // Fetch property aspects for further validation Map> allStructuredPropertiesAspects = - fetchPropertyAspects(validPropertyUrns, aspectRetriever); + fetchPropertyAspects(mcpItems, aspectRetriever, exceptions, false); // Validate assignments for (BatchItem i : exceptions.successful(mcpItems)) { @@ -120,15 +129,13 @@ public static Stream validateProposedUpserts( softDeleteCheck(i, propertyAspects, "Cannot apply a soft deleted Structured Property value") .ifPresent(exceptions::addException); - Aspect structuredPropertyDefinitionAspect = - propertyAspects.get(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME); - if (structuredPropertyDefinitionAspect == null) { + StructuredPropertyDefinition structuredPropertyDefinition = + lookupPropertyDefinition(propertyUrn, allStructuredPropertiesAspects); + if (structuredPropertyDefinition == null) { exceptions.addException(i, "Unexpected null value found."); } - StructuredPropertyDefinition structuredPropertyDefinition = - new StructuredPropertyDefinition(structuredPropertyDefinitionAspect.data()); - log.warn( + log.debug( "Retrieved property definition for {}. {}", propertyUrn, structuredPropertyDefinition); if (structuredPropertyDefinition != null) { PrimitivePropertyValueArray values = structuredPropertyValueAssignment.getValues(); @@ -158,8 +165,73 @@ public static Stream validateProposedUpserts( return exceptions.streamAllExceptions(); } + public static Stream validateImmutable( + @Nonnull Collection changeMCPs, @Nonnull AspectRetriever aspectRetriever) { + + ValidationExceptionCollection exceptions = ValidationExceptionCollection.newCollection(); + final Map> allStructuredPropertiesAspects = + fetchPropertyAspects(changeMCPs, aspectRetriever, exceptions, true); + + Set immutablePropertyUrns = + allStructuredPropertiesAspects.keySet().stream() + .map( + stringAspectMap -> + Pair.of( + stringAspectMap, + lookupPropertyDefinition(stringAspectMap, allStructuredPropertiesAspects))) + .filter(defPair -> defPair.getSecond() != null && defPair.getSecond().isImmutable()) + .map(Pair::getFirst) + .collect(Collectors.toSet()); + + // Validate immutable assignments + for (ChangeMCP i : exceptions.successful(changeMCPs)) { + + // only apply immutable validation if previous properties exist + if (i.getPreviousRecordTemplate() != null) { + Map newImmutablePropertyMap = + i.getAspect(StructuredProperties.class).getProperties().stream() + .filter(assign -> immutablePropertyUrns.contains(assign.getPropertyUrn())) + .collect( + Collectors.toMap( + StructuredPropertyValueAssignment::getPropertyUrn, Function.identity())); + Map oldImmutablePropertyMap = + i.getPreviousAspect(StructuredProperties.class).getProperties().stream() + .filter(assign -> immutablePropertyUrns.contains(assign.getPropertyUrn())) + .collect( + Collectors.toMap( + StructuredPropertyValueAssignment::getPropertyUrn, Function.identity())); + + // upsert/mutation path + newImmutablePropertyMap + .entrySet() + .forEach( + entry -> { + Urn propertyUrn = entry.getKey(); + StructuredPropertyValueAssignment assignment = entry.getValue(); + + if (oldImmutablePropertyMap.containsKey(propertyUrn) + && !oldImmutablePropertyMap.get(propertyUrn).equals(assignment)) { + exceptions.addException( + i, String.format("Cannot mutate an immutable property: %s", propertyUrn)); + } + }); + + // delete path + oldImmutablePropertyMap.entrySet().stream() + .filter(entry -> !newImmutablePropertyMap.containsKey(entry.getKey())) + .forEach( + entry -> + exceptions.addException( + i, + String.format("Cannot delete an immutable property %s", entry.getKey()))); + } + } + + return exceptions.streamAllExceptions(); + } + private static Set validateStructuredPropertyUrns( - Collection mcpItems, ValidationExceptionCollection exceptions) { + Collection mcpItems, ValidationExceptionCollection exceptions) { Set validPropertyUrns = new HashSet<>(); for (BatchItem i : exceptions.successful(mcpItems)) { @@ -202,6 +274,17 @@ private static Set validateStructuredPropertyUrns( return validPropertyUrns; } + private static Set previousStructuredPropertyUrns(Collection mcpItems) { + return mcpItems.stream() + .filter(i -> i instanceof ChangeMCP) + .map(i -> ((ChangeMCP) i)) + .filter(i -> i.getPreviousRecordTemplate() != null) + .flatMap(i -> i.getPreviousAspect(StructuredProperties.class).getProperties().stream()) + .map(StructuredPropertyValueAssignment::getPropertyUrn) + .filter(propertyUrn -> propertyUrn.getEntityType().equals("structuredProperty")) + .collect(Collectors.toSet()); + } + private static Optional validateAllowedValues( BatchItem item, Urn propertyUrn, @@ -338,14 +421,40 @@ private static String getValueTypeId(@Nonnull final Urn valueType) { } private static Map> fetchPropertyAspects( - Set structuredPropertyUrns, AspectRetriever aspectRetriever) { - if (structuredPropertyUrns.isEmpty()) { + @Nonnull Collection mcpItems, + AspectRetriever aspectRetriever, + @Nonnull ValidationExceptionCollection exceptions, + boolean includePrevious) { + + // Validate propertyUrns + Set validPropertyUrns = + Stream.concat( + validateStructuredPropertyUrns(mcpItems, exceptions).stream(), + includePrevious + ? previousStructuredPropertyUrns(mcpItems).stream() + : Stream.empty()) + .collect(Collectors.toSet()); + + if (validPropertyUrns.isEmpty()) { return Collections.emptyMap(); } else { return aspectRetriever.getLatestAspectObjects( - structuredPropertyUrns, + validPropertyUrns, ImmutableSet.of( Constants.STATUS_ASPECT_NAME, STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME)); } } + + @Nullable + private static StructuredPropertyDefinition lookupPropertyDefinition( + @Nonnull Urn propertyUrn, + @Nonnull Map> allStructuredPropertiesAspects) { + Map propertyAspects = + allStructuredPropertiesAspects.getOrDefault(propertyUrn, Collections.emptyMap()); + Aspect structuredPropertyDefinitionAspect = + propertyAspects.get(STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME); + return structuredPropertyDefinitionAspect == null + ? null + : new StructuredPropertyDefinition(structuredPropertyDefinitionAspect.data()); + } } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/ConfigEntityRegistry.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/ConfigEntityRegistry.java index 9aed29ab8595e..4238c333615ec 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/ConfigEntityRegistry.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/ConfigEntityRegistry.java @@ -9,6 +9,7 @@ import com.linkedin.data.schema.DataSchema; import com.linkedin.metadata.aspect.patch.template.AspectTemplateEngine; import com.linkedin.metadata.aspect.plugins.PluginFactory; +import com.linkedin.metadata.aspect.plugins.config.PluginConfiguration; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.DataSchemaFactory; import com.linkedin.metadata.models.DefaultEntitySpec; @@ -33,9 +34,11 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.function.BiFunction; import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import lombok.Getter; import lombok.extern.slf4j.Slf4j; @@ -48,6 +51,10 @@ public class ConfigEntityRegistry implements EntityRegistry { private final DataSchemaFactory dataSchemaFactory; @Getter private final PluginFactory pluginFactory; + + @Nullable + private BiFunction, PluginFactory> pluginFactoryProvider; + private final Map entityNameToSpec; private final Map eventNameToSpec; private final List entitySpecs; @@ -66,19 +73,27 @@ public class ConfigEntityRegistry implements EntityRegistry { .setStreamReadConstraints(StreamReadConstraints.builder().maxStringLength(maxSize).build()); } - public ConfigEntityRegistry(Pair configFileClassPathPair) throws IOException { + public ConfigEntityRegistry( + Pair configFileClassPathPair, + @Nullable + BiFunction, PluginFactory> pluginFactoryProvider) + throws IOException { this( DataSchemaFactory.withCustomClasspath(configFileClassPathPair.getSecond()), DataSchemaFactory.getClassLoader(configFileClassPathPair.getSecond()) .map(Stream::of) .orElse(Stream.empty()) .collect(Collectors.toList()), - configFileClassPathPair.getFirst()); + configFileClassPathPair.getFirst(), + pluginFactoryProvider); } - public ConfigEntityRegistry(String entityRegistryRoot) + public ConfigEntityRegistry( + String entityRegistryRoot, + @Nullable + BiFunction, PluginFactory> pluginFactoryProvider) throws EntityRegistryException, IOException { - this(getFileAndClassPath(entityRegistryRoot)); + this(getFileAndClassPath(entityRegistryRoot), pluginFactoryProvider); } private static Pair getFileAndClassPath(String entityRegistryRoot) @@ -117,24 +132,57 @@ private static Pair getFileAndClassPath(String entityRegistryRoot) } public ConfigEntityRegistry(InputStream configFileInputStream) { - this(DataSchemaFactory.getInstance(), Collections.emptyList(), configFileInputStream); + this(configFileInputStream, null); } public ConfigEntityRegistry( - DataSchemaFactory dataSchemaFactory, List classLoaders, Path configFilePath) + InputStream configFileInputStream, + @Nullable + BiFunction, PluginFactory> pluginFactoryProvider) { + this( + DataSchemaFactory.getInstance(), + Collections.emptyList(), + configFileInputStream, + pluginFactoryProvider); + } + + public ConfigEntityRegistry( + DataSchemaFactory dataSchemaFactory, + List classLoaders, + Path configFilePath, + @Nullable + BiFunction, PluginFactory> pluginFactoryProvider) throws FileNotFoundException { - this(dataSchemaFactory, classLoaders, new FileInputStream(configFilePath.toString())); + this( + dataSchemaFactory, + classLoaders, + new FileInputStream(configFilePath.toString()), + pluginFactoryProvider); } public ConfigEntityRegistry( DataSchemaFactory dataSchemaFactory, List classLoaders, InputStream configFileStream) { + this(dataSchemaFactory, classLoaders, configFileStream, null); + } + + public ConfigEntityRegistry( + DataSchemaFactory dataSchemaFactory, + List classLoaders, + InputStream configFileStream, + @Nullable + BiFunction, PluginFactory> pluginFactoryProvider) { this.dataSchemaFactory = dataSchemaFactory; Entities entities; try { entities = OBJECT_MAPPER.readValue(configFileStream, Entities.class); - this.pluginFactory = PluginFactory.withCustomClasspath(entities.getPlugins(), classLoaders); + if (pluginFactoryProvider != null) { + this.pluginFactory = pluginFactoryProvider.apply(entities.getPlugins(), classLoaders); + } else { + this.pluginFactory = PluginFactory.withCustomClasspath(entities.getPlugins(), classLoaders); + } + this.pluginFactoryProvider = pluginFactoryProvider; } catch (IOException e) { throw new IllegalArgumentException( String.format( diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/EntityRegistry.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/EntityRegistry.java index 83f5ab08e9f19..405c848f53660 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/EntityRegistry.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/EntityRegistry.java @@ -2,6 +2,7 @@ import com.linkedin.metadata.aspect.patch.template.AspectTemplateEngine; import com.linkedin.metadata.aspect.plugins.PluginFactory; +import com.linkedin.metadata.aspect.plugins.config.PluginConfiguration; import com.linkedin.metadata.aspect.plugins.hooks.MCLSideEffect; import com.linkedin.metadata.aspect.plugins.hooks.MCPSideEffect; import com.linkedin.metadata.aspect.plugins.hooks.MutationHook; @@ -12,6 +13,7 @@ import com.linkedin.metadata.models.EventSpec; import java.util.List; import java.util.Map; +import java.util.function.BiFunction; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -135,4 +137,10 @@ default List getAllMCLSideEffects() { default PluginFactory getPluginFactory() { return PluginFactory.empty(); } + + @Nullable + default BiFunction, PluginFactory> + getPluginFactoryProvider() { + return null; + } } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/MergedEntityRegistry.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/MergedEntityRegistry.java index 6a733cc23f395..ac8e302ac5077 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/MergedEntityRegistry.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/MergedEntityRegistry.java @@ -5,6 +5,7 @@ import com.linkedin.data.schema.compatibility.CompatibilityResult; import com.linkedin.metadata.aspect.patch.template.AspectTemplateEngine; import com.linkedin.metadata.aspect.plugins.PluginFactory; +import com.linkedin.metadata.aspect.plugins.config.PluginConfiguration; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.ConfigEntitySpec; import com.linkedin.metadata.models.DefaultEntitySpec; @@ -14,8 +15,10 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.function.BiFunction; import java.util.stream.Collectors; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import lombok.Getter; import lombok.Setter; import lombok.extern.slf4j.Slf4j; @@ -28,7 +31,11 @@ public class MergedEntityRegistry implements EntityRegistry { private final Map eventNameToSpec; private final AspectTemplateEngine _aspectTemplateEngine; private final Map _aspectNameToSpec; - @Nonnull private PluginFactory pluginFactory; + + @Getter @Nonnull private PluginFactory pluginFactory; + + @Getter @Nullable + private BiFunction, PluginFactory> pluginFactoryProvider; public MergedEntityRegistry(EntityRegistry baseEntityRegistry) { // baseEntityRegistry.get*Specs() can return immutable Collections.emptyMap() which fails @@ -51,6 +58,7 @@ public MergedEntityRegistry(EntityRegistry baseEntityRegistry) { } else { this.pluginFactory = PluginFactory.empty(); } + this.pluginFactoryProvider = baseEntityRegistry.getPluginFactoryProvider(); } private void validateEntitySpec(EntitySpec entitySpec, final ValidationResult validationResult) { @@ -100,7 +108,8 @@ public MergedEntityRegistry apply(EntityRegistry patchEntityRegistry) // Merge Plugins this.pluginFactory = - PluginFactory.merge(this.pluginFactory, patchEntityRegistry.getPluginFactory()); + PluginFactory.merge( + this.pluginFactory, patchEntityRegistry.getPluginFactory(), this.pluginFactoryProvider); return this; } @@ -220,12 +229,6 @@ public AspectTemplateEngine getAspectTemplateEngine() { return _aspectTemplateEngine; } - @Nonnull - @Override - public PluginFactory getPluginFactory() { - return this.pluginFactory; - } - @Setter @Getter private static class ValidationResult { diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/PatchEntityRegistry.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/PatchEntityRegistry.java index b4fc4193e7263..7de040b03de72 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/PatchEntityRegistry.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/PatchEntityRegistry.java @@ -9,6 +9,7 @@ import com.linkedin.data.schema.DataSchema; import com.linkedin.metadata.aspect.patch.template.AspectTemplateEngine; import com.linkedin.metadata.aspect.plugins.PluginFactory; +import com.linkedin.metadata.aspect.plugins.config.PluginConfiguration; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.DataSchemaFactory; import com.linkedin.metadata.models.EntitySpec; @@ -31,9 +32,11 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.function.BiFunction; import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import lombok.Getter; import lombok.extern.slf4j.Slf4j; import org.apache.maven.artifact.versioning.ComparableVersion; @@ -48,6 +51,10 @@ public class PatchEntityRegistry implements EntityRegistry { private final DataSchemaFactory dataSchemaFactory; @Getter private final PluginFactory pluginFactory; + + @Getter @Nullable + private BiFunction, PluginFactory> pluginFactoryProvider; + private final Map entityNameToSpec; private final Map eventNameToSpec; private final Map _aspectNameToSpec; @@ -88,7 +95,9 @@ public String toString() { public PatchEntityRegistry( Pair configFileClassPathPair, String registryName, - ComparableVersion registryVersion) + ComparableVersion registryVersion, + @Nullable + BiFunction, PluginFactory> pluginFactoryProvider) throws IOException, EntityRegistryException { this( DataSchemaFactory.withCustomClasspath(configFileClassPathPair.getSecond()), @@ -98,13 +107,22 @@ public PatchEntityRegistry( .collect(Collectors.toList()), configFileClassPathPair.getFirst(), registryName, - registryVersion); + registryVersion, + pluginFactoryProvider); } public PatchEntityRegistry( - String entityRegistryRoot, String registryName, ComparableVersion registryVersion) + String entityRegistryRoot, + String registryName, + ComparableVersion registryVersion, + @Nullable + BiFunction, PluginFactory> pluginFactoryProvider) throws EntityRegistryException, IOException { - this(getFileAndClassPath(entityRegistryRoot), registryName, registryVersion); + this( + getFileAndClassPath(entityRegistryRoot), + registryName, + registryVersion, + pluginFactoryProvider); } private static Pair getFileAndClassPath(String entityRegistryRoot) @@ -147,14 +165,17 @@ public PatchEntityRegistry( List classLoaders, Path configFilePath, String registryName, - ComparableVersion registryVersion) + ComparableVersion registryVersion, + @Nullable + BiFunction, PluginFactory> pluginFactoryProvider) throws FileNotFoundException, EntityRegistryException { this( dataSchemaFactory, classLoaders, new FileInputStream(configFilePath.toString()), registryName, - registryVersion); + registryVersion, + pluginFactoryProvider); } private PatchEntityRegistry( @@ -162,7 +183,9 @@ private PatchEntityRegistry( List classLoaders, InputStream configFileStream, String registryName, - ComparableVersion registryVersion) + ComparableVersion registryVersion, + @Nullable + BiFunction, PluginFactory> pluginFactoryProvider) throws EntityRegistryException { this.dataSchemaFactory = dataSchemaFactory; this.registryName = registryName; @@ -171,7 +194,12 @@ private PatchEntityRegistry( Entities entities; try { entities = OBJECT_MAPPER.readValue(configFileStream, Entities.class); - this.pluginFactory = PluginFactory.withCustomClasspath(entities.getPlugins(), classLoaders); + if (pluginFactoryProvider != null) { + this.pluginFactory = pluginFactoryProvider.apply(entities.getPlugins(), classLoaders); + } else { + this.pluginFactory = PluginFactory.withCustomClasspath(entities.getPlugins(), classLoaders); + } + this.pluginFactoryProvider = pluginFactoryProvider; } catch (IOException e) { log.error("Unable to read Patch configuration.", e); throw new IllegalArgumentException( diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/PluginEntityRegistryLoader.java b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/PluginEntityRegistryLoader.java index 09b33ad0f596e..4f2e5a106ae79 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/registry/PluginEntityRegistryLoader.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/registry/PluginEntityRegistryLoader.java @@ -1,5 +1,7 @@ package com.linkedin.metadata.models.registry; +import com.linkedin.metadata.aspect.plugins.PluginFactory; +import com.linkedin.metadata.aspect.plugins.config.PluginConfiguration; import com.linkedin.metadata.models.registry.config.EntityRegistryLoadResult; import com.linkedin.metadata.models.registry.config.LoadStatus; import com.linkedin.util.Pair; @@ -19,7 +21,9 @@ import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; +import java.util.function.BiFunction; import java.util.stream.Collectors; +import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; import org.apache.maven.artifact.versioning.ComparableVersion; @@ -33,13 +37,22 @@ public class PluginEntityRegistryLoader { private final Map>> patchRegistries; private MergedEntityRegistry mergedEntityRegistry; + + @Nullable + private final BiFunction, PluginFactory> + pluginFactoryProvider; + private boolean started = false; private final Lock lock = new ReentrantLock(); private final Condition initialized = lock.newCondition(); private boolean booted = false; private final ScheduledExecutorService executorService = Executors.newScheduledThreadPool(1); - public PluginEntityRegistryLoader(String pluginDirectory, int loadDelaySeconds) { + public PluginEntityRegistryLoader( + String pluginDirectory, + int loadDelaySeconds, + @Nullable + BiFunction, PluginFactory> pluginFactoryProvider) { File directory = new File(pluginDirectory); if (!directory.exists() || !directory.isDirectory()) { log.warn( @@ -52,6 +65,7 @@ public PluginEntityRegistryLoader(String pluginDirectory, int loadDelaySeconds) this.pluginDirectory = pluginDirectory; this.patchRegistries = new HashMap<>(); this.loadDelaySeconds = loadDelaySeconds; + this.pluginFactoryProvider = pluginFactoryProvider; } public Map>> @@ -180,7 +194,9 @@ private void loadOneRegistry( EntityRegistryLoadResult.builder().registryLocation(patchDirectory); EntityRegistry entityRegistry = null; try { - entityRegistry = new PatchEntityRegistry(patchDirectory, registryName, registryVersion); + entityRegistry = + new PatchEntityRegistry( + patchDirectory, registryName, registryVersion, pluginFactoryProvider); parentRegistry.apply(entityRegistry); loadResultBuilder.loadResult(LoadStatus.SUCCESS); diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/hooks/StructuredPropertiesSoftDeleteTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/hooks/StructuredPropertiesSoftDeleteTest.java index 363a9d01c95bc..e1e84f5728540 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/hooks/StructuredPropertiesSoftDeleteTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/hooks/StructuredPropertiesSoftDeleteTest.java @@ -52,17 +52,18 @@ public void testSoftDeleteFilter() throws URISyntaxException, CloneNotSupportedE .setValues(new PrimitivePropertyValueArray(PrimitivePropertyValue.create(0.0))); StructuredPropertiesSoftDelete testHook = - new StructuredPropertiesSoftDelete( - AspectPluginConfig.builder() - .enabled(true) - .className(StructuredPropertiesSoftDelete.class.getName()) - .supportedEntityAspectNames( - List.of( - AspectPluginConfig.EntityAspectName.builder() - .entityName(DATASET_ENTITY_NAME) - .aspectName(Constants.STRUCTURED_PROPERTIES_ASPECT_NAME) - .build())) - .build()); + new StructuredPropertiesSoftDelete() + .setConfig( + AspectPluginConfig.builder() + .enabled(true) + .className(StructuredPropertiesSoftDelete.class.getName()) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName(DATASET_ENTITY_NAME) + .aspectName(Constants.STRUCTURED_PROPERTIES_ASPECT_NAME) + .build())) + .build()); StructuredProperties expectedAllValues = new StructuredProperties(); expectedAllValues.setProperties( diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/ChartInfoTemplateTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/ChartInfoTemplateTest.java index b2911100519fc..8b138e0f59ee9 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/ChartInfoTemplateTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/ChartInfoTemplateTest.java @@ -1,17 +1,11 @@ package com.linkedin.metadata.aspect.patch.template; -import static com.fasterxml.jackson.databind.node.JsonNodeFactory.*; - -import com.fasterxml.jackson.databind.node.ObjectNode; -import com.github.fge.jackson.jsonpointer.JsonPointer; -import com.github.fge.jsonpatch.AddOperation; -import com.github.fge.jsonpatch.JsonPatch; -import com.github.fge.jsonpatch.JsonPatchOperation; import com.linkedin.chart.ChartInfo; import com.linkedin.common.urn.UrnUtils; import com.linkedin.metadata.aspect.patch.template.chart.ChartInfoTemplate; -import java.util.ArrayList; -import java.util.List; +import jakarta.json.Json; +import jakarta.json.JsonObjectBuilder; +import jakarta.json.JsonPatchBuilder; import org.testng.Assert; import org.testng.annotations.Test; @@ -21,18 +15,16 @@ public class ChartInfoTemplateTest { public void testChartInfoTemplate() throws Exception { ChartInfoTemplate chartInfoTemplate = new ChartInfoTemplate(); ChartInfo dashboardInfo = chartInfoTemplate.getDefault(); - List patchOperations = new ArrayList<>(); - ObjectNode edgeNode = instance.objectNode(); - edgeNode.put( + JsonPatchBuilder patchOperations = Json.createPatchBuilder(); + + JsonObjectBuilder edgeNode = Json.createObjectBuilder(); + edgeNode.add( "destinationUrn", "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)"); - JsonPatchOperation operation = - new AddOperation( - new JsonPointer( - "/inputEdges/urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)"), - edgeNode); - patchOperations.add(operation); - JsonPatch patch = new JsonPatch(patchOperations); - ChartInfo result = chartInfoTemplate.applyPatch(dashboardInfo, patch); + + patchOperations.add( + "/inputEdges/urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", + edgeNode.build()); + ChartInfo result = chartInfoTemplate.applyPatch(dashboardInfo, patchOperations.build()); Assert.assertEquals( UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)"), diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/DashboardInfoTemplateTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/DashboardInfoTemplateTest.java index be15d6976aee6..1446635c8de94 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/DashboardInfoTemplateTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/DashboardInfoTemplateTest.java @@ -1,17 +1,10 @@ package com.linkedin.metadata.aspect.patch.template; -import static com.fasterxml.jackson.databind.node.JsonNodeFactory.*; - -import com.fasterxml.jackson.databind.node.ObjectNode; -import com.github.fge.jackson.jsonpointer.JsonPointer; -import com.github.fge.jsonpatch.AddOperation; -import com.github.fge.jsonpatch.JsonPatch; -import com.github.fge.jsonpatch.JsonPatchOperation; import com.linkedin.common.urn.UrnUtils; import com.linkedin.dashboard.DashboardInfo; import com.linkedin.metadata.aspect.patch.template.dashboard.DashboardInfoTemplate; -import java.util.ArrayList; -import java.util.List; +import jakarta.json.Json; +import jakarta.json.JsonPatchBuilder; import org.testng.Assert; import org.testng.annotations.Test; @@ -21,18 +14,18 @@ public class DashboardInfoTemplateTest { public void testDashboardInfoTemplate() throws Exception { DashboardInfoTemplate dashboardInfoTemplate = new DashboardInfoTemplate(); DashboardInfo dashboardInfo = dashboardInfoTemplate.getDefault(); - List patchOperations = new ArrayList<>(); - ObjectNode edgeNode = instance.objectNode(); - edgeNode.put( - "destinationUrn", "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)"); - JsonPatchOperation operation = - new AddOperation( - new JsonPointer( - "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)"), - edgeNode); - patchOperations.add(operation); - JsonPatch patch = new JsonPatch(patchOperations); - DashboardInfo result = dashboardInfoTemplate.applyPatch(dashboardInfo, patch); + JsonPatchBuilder jsonPatchBuilder = Json.createPatchBuilder(); + jsonPatchBuilder.add( + "/datasetEdges/urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)", + Json.createObjectBuilder() + .add( + "destinationUrn", + Json.createValue( + "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)")) + .build()); + + DashboardInfo result = + dashboardInfoTemplate.applyPatch(dashboardInfo, jsonPatchBuilder.build()); Assert.assertEquals( UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)"), diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/UpstreamLineageTemplateTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/UpstreamLineageTemplateTest.java index 4bad6a8e3d659..8c7bfc98b2673 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/UpstreamLineageTemplateTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/patch/template/UpstreamLineageTemplateTest.java @@ -1,47 +1,51 @@ package com.linkedin.metadata.aspect.patch.template; -import static com.fasterxml.jackson.databind.node.JsonNodeFactory.*; - -import com.fasterxml.jackson.databind.node.NumericNode; -import com.fasterxml.jackson.databind.node.ObjectNode; -import com.github.fge.jackson.jsonpointer.JsonPointer; -import com.github.fge.jsonpatch.AddOperation; -import com.github.fge.jsonpatch.JsonPatch; -import com.github.fge.jsonpatch.JsonPatchOperation; -import com.github.fge.jsonpatch.RemoveOperation; +import static com.linkedin.metadata.utils.GenericRecordUtils.JSON; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +import com.fasterxml.jackson.databind.ObjectMapper; import com.linkedin.common.UrnArray; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; +import com.linkedin.data.ByteString; import com.linkedin.data.DataMap; import com.linkedin.dataset.FineGrainedLineage; import com.linkedin.dataset.FineGrainedLineageDownstreamType; import com.linkedin.dataset.FineGrainedLineageUpstreamType; import com.linkedin.dataset.UpstreamLineage; import com.linkedin.metadata.aspect.patch.template.dataset.UpstreamLineageTemplate; -import java.util.ArrayList; -import java.util.List; -import org.testng.Assert; +import com.linkedin.metadata.utils.GenericRecordUtils; +import jakarta.json.Json; +import jakarta.json.JsonObjectBuilder; +import jakarta.json.JsonPatch; +import jakarta.json.JsonPatchBuilder; +import jakarta.json.JsonValue; +import java.io.StringReader; +import java.nio.charset.StandardCharsets; +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; import org.testng.annotations.Test; public class UpstreamLineageTemplateTest { + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + @Test public void testPatchUpstream() throws Exception { UpstreamLineageTemplate upstreamLineageTemplate = new UpstreamLineageTemplate(); UpstreamLineage upstreamLineage = upstreamLineageTemplate.getDefault(); - List patchOperations = new ArrayList<>(); - ObjectNode fineGrainedLineageNode = instance.objectNode(); - NumericNode upstreamConfidenceScore = instance.numberNode(1.0f); - fineGrainedLineageNode.set("confidenceScore", upstreamConfidenceScore); - JsonPatchOperation operation = - new AddOperation( - new JsonPointer( - "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c1)//urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c1)"), - fineGrainedLineageNode); - patchOperations.add(operation); - JsonPatch jsonPatch = new JsonPatch(patchOperations); + JsonPatchBuilder jsonPatchBuilder = Json.createPatchBuilder(); + + JsonObjectBuilder fineGrainedLineageNode = Json.createObjectBuilder(); + JsonValue upstreamConfidenceScore = Json.createValue(1.0f); + fineGrainedLineageNode.add("confidenceScore", upstreamConfidenceScore); + + jsonPatchBuilder.add( + "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c1)//urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c1)", + fineGrainedLineageNode.build()); // Initial population test - UpstreamLineage result = upstreamLineageTemplate.applyPatch(upstreamLineage, jsonPatch); + UpstreamLineage result = + upstreamLineageTemplate.applyPatch(upstreamLineage, jsonPatchBuilder.build()); // Hack because Jackson parses values to doubles instead of floats DataMap dataMap = new DataMap(); dataMap.put("confidenceScore", 1.0); @@ -61,36 +65,35 @@ public void testPatchUpstream() throws Exception { fineGrainedLineage.setTransformOperation("CREATE"); fineGrainedLineage.setUpstreamType(FineGrainedLineageUpstreamType.FIELD_SET); fineGrainedLineage.setDownstreamType(FineGrainedLineageDownstreamType.FIELD); - Assert.assertEquals(result.getFineGrainedLineages().get(0), fineGrainedLineage); + assertEquals(result.getFineGrainedLineages().get(0), fineGrainedLineage); // Test non-overwrite upstreams and correct confidence score and types w/ overwrite - ObjectNode finegrainedLineageNode2 = instance.objectNode(); - finegrainedLineageNode2.set( - "upstreamType", instance.textNode(FineGrainedLineageUpstreamType.FIELD_SET.name())); - finegrainedLineageNode2.set("confidenceScore", upstreamConfidenceScore); - finegrainedLineageNode2.set( - "downstreamType", instance.textNode(FineGrainedLineageDownstreamType.FIELD.name())); - JsonPatchOperation operation2 = - new AddOperation( - new JsonPointer( - "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:someQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)"), - finegrainedLineageNode2); - NumericNode upstreamConfidenceScore2 = instance.numberNode(0.1f); - ObjectNode finegrainedLineageNode3 = instance.objectNode(); - finegrainedLineageNode3.set( - "upstreamType", instance.textNode(FineGrainedLineageUpstreamType.DATASET.name())); - finegrainedLineageNode3.set("confidenceScore", upstreamConfidenceScore2); - finegrainedLineageNode3.set( - "downstreamType", instance.textNode(FineGrainedLineageDownstreamType.FIELD_SET.name())); - JsonPatchOperation operation3 = - new AddOperation( - new JsonPointer( - "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:someQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)"), - finegrainedLineageNode3); - List patchOperations2 = new ArrayList<>(); - patchOperations2.add(operation2); - patchOperations2.add(operation3); - JsonPatch jsonPatch2 = new JsonPatch(patchOperations2); + JsonObjectBuilder finegrainedLineageNode2 = Json.createObjectBuilder(); + finegrainedLineageNode2.add( + "upstreamType", Json.createValue(FineGrainedLineageUpstreamType.FIELD_SET.name())); + finegrainedLineageNode2.add("confidenceScore", upstreamConfidenceScore); + finegrainedLineageNode2.add( + "downstreamType", Json.createValue(FineGrainedLineageDownstreamType.FIELD.name())); + + JsonPatchBuilder patchOperations2 = Json.createPatchBuilder(); + patchOperations2.add( + "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:someQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)", + finegrainedLineageNode2.build()); + + JsonValue upstreamConfidenceScore2 = Json.createValue(0.1f); + JsonObjectBuilder finegrainedLineageNode3 = Json.createObjectBuilder(); + finegrainedLineageNode3.add( + "upstreamType", Json.createValue(FineGrainedLineageUpstreamType.DATASET.name())); + finegrainedLineageNode3.add("confidenceScore", upstreamConfidenceScore2); + finegrainedLineageNode3.add( + "downstreamType", Json.createValue(FineGrainedLineageDownstreamType.FIELD_SET.name())); + + patchOperations2.add( + "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:someQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)", + finegrainedLineageNode3.build()); + + JsonPatch jsonPatch2 = patchOperations2.build(); + UpstreamLineage result2 = upstreamLineageTemplate.applyPatch(result, jsonPatch2); // Hack because Jackson parses values to doubles instead of floats DataMap dataMap2 = new DataMap(); @@ -112,23 +115,22 @@ public void testPatchUpstream() throws Exception { fineGrainedLineage2.setUpstreamType(FineGrainedLineageUpstreamType.DATASET); fineGrainedLineage2.setDownstreamType(FineGrainedLineageDownstreamType.FIELD_SET); fineGrainedLineage2.setQuery(UrnUtils.getUrn("urn:li:query:someQuery")); - Assert.assertEquals(result2.getFineGrainedLineages().get(1), fineGrainedLineage2); + assertEquals(result2.getFineGrainedLineages().get(1), fineGrainedLineage2); // Check different queries - ObjectNode finegrainedLineageNode4 = instance.objectNode(); - finegrainedLineageNode4.set( - "upstreamType", instance.textNode(FineGrainedLineageUpstreamType.FIELD_SET.name())); - finegrainedLineageNode4.set("confidenceScore", upstreamConfidenceScore); - finegrainedLineageNode4.set( - "downstreamType", instance.textNode(FineGrainedLineageDownstreamType.FIELD.name())); - JsonPatchOperation operation4 = - new AddOperation( - new JsonPointer( - "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:anotherQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)"), - finegrainedLineageNode4); - List patchOperations3 = new ArrayList<>(); - patchOperations3.add(operation4); - JsonPatch jsonPatch3 = new JsonPatch(patchOperations3); + JsonObjectBuilder finegrainedLineageNode4 = Json.createObjectBuilder(); + finegrainedLineageNode4.add( + "upstreamType", Json.createValue(FineGrainedLineageUpstreamType.FIELD_SET.name())); + finegrainedLineageNode4.add("confidenceScore", upstreamConfidenceScore); + finegrainedLineageNode4.add( + "downstreamType", Json.createValue(FineGrainedLineageDownstreamType.FIELD.name())); + + JsonPatchBuilder patchOperations3 = Json.createPatchBuilder(); + patchOperations3.add( + "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:anotherQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)", + finegrainedLineageNode4.build()); + + JsonPatch jsonPatch3 = patchOperations3.build(); UpstreamLineage result3 = upstreamLineageTemplate.applyPatch(result2, jsonPatch3); // Hack because Jackson parses values to doubles instead of floats DataMap dataMap3 = new DataMap(); @@ -152,23 +154,22 @@ public void testPatchUpstream() throws Exception { fineGrainedLineage3.setDownstreamType(FineGrainedLineageDownstreamType.FIELD); fineGrainedLineage3.setQuery(UrnUtils.getUrn("urn:li:query:anotherQuery")); // Splits into two for different types - Assert.assertEquals(result3.getFineGrainedLineages().get(2), fineGrainedLineage3); + assertEquals(result3.getFineGrainedLineages().get(2), fineGrainedLineage3); // Check different transform types - ObjectNode finegrainedLineageNode5 = instance.objectNode(); - finegrainedLineageNode5.set( - "upstreamType", instance.textNode(FineGrainedLineageUpstreamType.FIELD_SET.name())); - finegrainedLineageNode5.set("confidenceScore", upstreamConfidenceScore); - finegrainedLineageNode5.set( - "downstreamType", instance.textNode(FineGrainedLineageDownstreamType.FIELD.name())); - JsonPatchOperation operation5 = - new AddOperation( - new JsonPointer( - "/fineGrainedLineages/TRANSFORM/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:anotherQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)"), - finegrainedLineageNode5); - List patchOperations4 = new ArrayList<>(); - patchOperations4.add(operation5); - JsonPatch jsonPatch4 = new JsonPatch(patchOperations4); + JsonObjectBuilder finegrainedLineageNode5 = Json.createObjectBuilder(); + finegrainedLineageNode5.add( + "upstreamType", Json.createValue(FineGrainedLineageUpstreamType.FIELD_SET.name())); + finegrainedLineageNode5.add("confidenceScore", upstreamConfidenceScore); + finegrainedLineageNode5.add( + "downstreamType", Json.createValue(FineGrainedLineageDownstreamType.FIELD.name())); + + JsonPatchBuilder patchOperations4 = Json.createPatchBuilder(); + patchOperations4.add( + "/fineGrainedLineages/TRANSFORM/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:anotherQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)", + finegrainedLineageNode5.build()); + JsonPatch jsonPatch4 = patchOperations4.build(); + UpstreamLineage result4 = upstreamLineageTemplate.applyPatch(result3, jsonPatch4); // Hack because Jackson parses values to doubles instead of floats DataMap dataMap4 = new DataMap(); @@ -181,33 +182,76 @@ public void testPatchUpstream() throws Exception { fineGrainedLineage4.setDownstreamType(FineGrainedLineageDownstreamType.FIELD); fineGrainedLineage4.setQuery(UrnUtils.getUrn("urn:li:query:anotherQuery")); // New entry in array because of new transformation type - Assert.assertEquals(result4.getFineGrainedLineages().get(3), fineGrainedLineage4); + assertEquals(result4.getFineGrainedLineages().get(3), fineGrainedLineage4); // Remove - JsonPatchOperation removeOperation = - new RemoveOperation( - new JsonPointer( - "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c1)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c1)")); - JsonPatchOperation removeOperation2 = - new RemoveOperation( - new JsonPointer( - "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:someQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)")); - JsonPatchOperation removeOperation3 = - new RemoveOperation( - new JsonPointer( - "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:anotherQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)")); - JsonPatchOperation removeOperation4 = - new RemoveOperation( - new JsonPointer( - "/fineGrainedLineages/TRANSFORM/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:anotherQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)")); - - List removeOperations = new ArrayList<>(); - removeOperations.add(removeOperation); - removeOperations.add(removeOperation2); - removeOperations.add(removeOperation3); - removeOperations.add(removeOperation4); - JsonPatch removePatch = new JsonPatch(removeOperations); + JsonPatchBuilder removeOperations = Json.createPatchBuilder(); + removeOperations.remove( + "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c1)/NONE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c1)"); + removeOperations.remove( + "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:someQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)"); + removeOperations.remove( + "/fineGrainedLineages/CREATE/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:anotherQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)"); + removeOperations.remove( + "/fineGrainedLineages/TRANSFORM/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_2,PROD),c2)/urn:li:query:anotherQuery/urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream_table_1,PROD),c2)"); + + JsonPatch removePatch = removeOperations.build(); UpstreamLineage finalResult = upstreamLineageTemplate.applyPatch(result4, removePatch); - Assert.assertEquals(finalResult, upstreamLineageTemplate.getDefault()); + assertEquals(finalResult, upstreamLineageTemplate.getDefault()); + } + + @Test + public void testLargePatchStandard() throws Exception { + // Load patch operations from fixture + String patchStr = + OBJECT_MAPPER + .readTree( + new GzipCompressorInputStream( + this.getClass() + .getResourceAsStream("/patch/large_upstream_lineage_mcp.json.gz"))) + .get("aspect") + .get("com.linkedin.pegasus2avro.mxe.GenericAspect") + .get("value") + .asText(); + + JsonPatchBuilder patchBuilder = + Json.createPatchBuilder(Json.createReader(new StringReader(patchStr)).readArray()); + + // Overall the patch is a no-op, adding change to assert difference after application + patchBuilder.remove( + "/upstreams/urn:li:dataset:(urn:li:dataPlatform:snowflake,road_curated_nrt.db_3134_dbo.lineitem,PROD)"); + + JsonPatch jsonPatch = patchBuilder.build(); + assertEquals(jsonPatch.toJsonArray().size(), 7491); + + // Load existing aspect + String aspectStr = + OBJECT_MAPPER + .readTree( + new GzipCompressorInputStream( + this.getClass() + .getResourceAsStream("/patch/large_upstream_lineage_aspect.json.gz"))) + .get("select") + .get(0) + .get("metadata") + .asText(); + UpstreamLineage upstreamLineage = + GenericRecordUtils.deserializeAspect( + ByteString.copyString(aspectStr, StandardCharsets.UTF_8), JSON, UpstreamLineage.class); + assertEquals(upstreamLineage.getUpstreams().size(), 188); + assertEquals(upstreamLineage.getFineGrainedLineages().size(), 607); + + // Apply patch standard + UpstreamLineageTemplate upstreamLineageTemplate = new UpstreamLineageTemplate(); + + long start = System.currentTimeMillis(); + UpstreamLineage result = upstreamLineageTemplate.applyPatch(upstreamLineage, jsonPatch); + long end = System.currentTimeMillis(); + assertTrue( + end - start < 10000, + String.format("Expected less then 10 seconds patch actual %s ms", end - start)); + + assertEquals(result.getUpstreams().size(), 187, "Expected 1 less upstream"); + assertEquals(result.getFineGrainedLineages().size(), 607); } } diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCLSideEffectTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCLSideEffectTest.java index e1de7cf87ee18..60bbdba16374a 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCLSideEffectTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCLSideEffectTest.java @@ -14,6 +14,9 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; import org.testng.annotations.BeforeTest; import org.testng.annotations.Test; @@ -41,25 +44,27 @@ public void testCustomMCLSideEffect() { assertEquals( mclSideEffects, List.of( - new TestMCLSideEffect( - AspectPluginConfig.builder() - .className( - "com.linkedin.metadata.aspect.plugins.hooks.MCLSideEffectTest$TestMCLSideEffect") - .supportedOperations(List.of("UPSERT")) - .enabled(true) - .supportedEntityAspectNames( - List.of( - AspectPluginConfig.EntityAspectName.builder() - .entityName("chart") - .aspectName("chartInfo") - .build())) - .build()))); + new TestMCLSideEffect() + .setConfig( + AspectPluginConfig.builder() + .className( + "com.linkedin.metadata.aspect.plugins.hooks.MCLSideEffectTest$TestMCLSideEffect") + .supportedOperations(List.of("UPSERT")) + .enabled(true) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName("chart") + .aspectName("chartInfo") + .build())) + .build()))); } + @Getter + @Setter + @Accessors(chain = true) public static class TestMCLSideEffect extends MCLSideEffect { - public TestMCLSideEffect(AspectPluginConfig aspectPluginConfig) { - super(aspectPluginConfig); - } + public AspectPluginConfig config; @Override protected Stream applyMCLSideEffect( diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffectTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffectTest.java index 70b8a2fe6de43..8e877d1d23aad 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffectTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MCPSideEffectTest.java @@ -14,6 +14,9 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; import org.testng.annotations.BeforeTest; import org.testng.annotations.Test; @@ -41,26 +44,28 @@ public void testCustomMCPSideEffect() { assertEquals( mcpSideEffects, List.of( - new MCPSideEffectTest.TestMCPSideEffect( - AspectPluginConfig.builder() - .className( - "com.linkedin.metadata.aspect.plugins.hooks.MCPSideEffectTest$TestMCPSideEffect") - .supportedOperations(List.of("UPSERT")) - .enabled(true) - .supportedEntityAspectNames( - List.of( - AspectPluginConfig.EntityAspectName.builder() - .entityName("dataset") - .aspectName("datasetKey") - .build())) - .build()))); + new MCPSideEffectTest.TestMCPSideEffect() + .setConfig( + AspectPluginConfig.builder() + .className( + "com.linkedin.metadata.aspect.plugins.hooks.MCPSideEffectTest$TestMCPSideEffect") + .supportedOperations(List.of("UPSERT")) + .enabled(true) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName("dataset") + .aspectName("datasetKey") + .build())) + .build()))); } + @Getter + @Setter + @Accessors(chain = true) public static class TestMCPSideEffect extends MCPSideEffect { - public TestMCPSideEffect(AspectPluginConfig aspectPluginConfig) { - super(aspectPluginConfig); - } + public AspectPluginConfig config; @Override protected Stream applyMCPSideEffect( diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MutationPluginTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MutationPluginTest.java index 16ea003582b18..9722f64ec82a0 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MutationPluginTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/hooks/MutationPluginTest.java @@ -9,6 +9,9 @@ import com.linkedin.metadata.models.registry.ConfigEntityRegistry; import java.util.List; import java.util.stream.Collectors; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; import org.testng.annotations.BeforeTest; import org.testng.annotations.Test; @@ -36,24 +39,26 @@ public void testCustomMutator() { assertEquals( mutators, List.of( - new TestMutator( - AspectPluginConfig.builder() - .className( - "com.linkedin.metadata.aspect.plugins.hooks.MutationPluginTest$TestMutator") - .supportedOperations(List.of("UPSERT")) - .enabled(true) - .supportedEntityAspectNames( - List.of( - AspectPluginConfig.EntityAspectName.builder() - .entityName("*") - .aspectName("schemaMetadata") - .build())) - .build()))); + new TestMutator() + .setConfig( + AspectPluginConfig.builder() + .className( + "com.linkedin.metadata.aspect.plugins.hooks.MutationPluginTest$TestMutator") + .supportedOperations(List.of("UPSERT")) + .enabled(true) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName("*") + .aspectName("schemaMetadata") + .build())) + .build()))); } + @Getter + @Setter + @Accessors(chain = true) public static class TestMutator extends MutationHook { - public TestMutator(AspectPluginConfig aspectPluginConfig) { - super(aspectPluginConfig); - } + public AspectPluginConfig config; } } diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/validation/ValidatorPluginTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/validation/ValidatorPluginTest.java index 879464d332169..2667467b39e19 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/validation/ValidatorPluginTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/plugins/validation/ValidatorPluginTest.java @@ -15,6 +15,9 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; +import lombok.Getter; +import lombok.Setter; +import lombok.experimental.Accessors; import org.testng.annotations.BeforeTest; import org.testng.annotations.Test; @@ -42,39 +45,42 @@ public void testCustomValidator() { assertEquals( validators, List.of( - new TestValidator( - AspectPluginConfig.builder() - .className( - "com.linkedin.metadata.aspect.plugins.validation.ValidatorPluginTest$TestValidator") - .supportedOperations(List.of("UPSERT")) - .enabled(true) - .supportedEntityAspectNames( - List.of( - AspectPluginConfig.EntityAspectName.builder() - .entityName("*") - .aspectName("status") - .build())) - .build()), - new TestValidator( - AspectPluginConfig.builder() - .className( - "com.linkedin.metadata.aspect.plugins.validation.ValidatorPluginTest$TestValidator") - .supportedOperations(List.of("UPSERT")) - .enabled(true) - .supportedEntityAspectNames( - List.of( - AspectPluginConfig.EntityAspectName.builder() - .entityName("chart") - .aspectName("status") - .build())) - .build()))); + new TestValidator() + .setConfig( + AspectPluginConfig.builder() + .className( + "com.linkedin.metadata.aspect.plugins.validation.ValidatorPluginTest$TestValidator") + .supportedOperations(List.of("UPSERT")) + .enabled(true) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName("*") + .aspectName("status") + .build())) + .build()), + new TestValidator() + .setConfig( + AspectPluginConfig.builder() + .className( + "com.linkedin.metadata.aspect.plugins.validation.ValidatorPluginTest$TestValidator") + .supportedOperations(List.of("UPSERT")) + .enabled(true) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName("chart") + .aspectName("status") + .build())) + .build()))); } + @Getter + @Setter + @Accessors(chain = true) public static class TestValidator extends AspectPayloadValidator { - public TestValidator(AspectPluginConfig config) { - super(config); - } + public AspectPluginConfig config; @Override protected Stream validateProposedAspects( diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/CreateIfNotExistsValidatorTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/CreateIfNotExistsValidatorTest.java index 5ccc9ceb8d02c..c201c2b11925a 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/CreateIfNotExistsValidatorTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/CreateIfNotExistsValidatorTest.java @@ -51,7 +51,7 @@ public void init() { @Test public void testCreateIfEntityNotExistsSuccess() { - CreateIfNotExistsValidator test = new CreateIfNotExistsValidator(validatorConfig); + CreateIfNotExistsValidator test = new CreateIfNotExistsValidator().setConfig(validatorConfig); Urn testEntityUrn = UrnUtils.getUrn("urn:li:chart:(looker,baz1)"); Set exceptions = @@ -87,7 +87,7 @@ public void testCreateIfEntityNotExistsSuccess() { @Test public void testCreateIfEntityNotExistsFail() { - CreateIfNotExistsValidator test = new CreateIfNotExistsValidator(validatorConfig); + CreateIfNotExistsValidator test = new CreateIfNotExistsValidator().setConfig(validatorConfig); Urn testEntityUrn = UrnUtils.getUrn("urn:li:chart:(looker,baz1)"); ChangeMCP testItem = @@ -114,7 +114,7 @@ public void testCreateIfEntityNotExistsFail() { @Test public void testCreateIfNotExistsSuccess() { - CreateIfNotExistsValidator test = new CreateIfNotExistsValidator(validatorConfig); + CreateIfNotExistsValidator test = new CreateIfNotExistsValidator().setConfig(validatorConfig); Urn testEntityUrn = UrnUtils.getUrn("urn:li:chart:(looker,baz1)"); Set exceptions = @@ -138,7 +138,7 @@ public void testCreateIfNotExistsSuccess() { @Test public void testCreateIfNotExistsFail() { - CreateIfNotExistsValidator test = new CreateIfNotExistsValidator(validatorConfig); + CreateIfNotExistsValidator test = new CreateIfNotExistsValidator().setConfig(validatorConfig); Urn testEntityUrn = UrnUtils.getUrn("urn:li:chart:(looker,baz1)"); SystemAspect mockSystemAspect = mock(SystemAspect.class); diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/PropertyDefinitionValidatorTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/PropertyDefinitionValidatorTest.java index e654bb5002afc..841cbf5a77bec 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/PropertyDefinitionValidatorTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/PropertyDefinitionValidatorTest.java @@ -293,22 +293,23 @@ public void testCanChangeAllowedValueDescriptions() @Test public void testHardDeleteBlock() { PropertyDefinitionValidator test = - new PropertyDefinitionValidator( - AspectPluginConfig.builder() - .enabled(true) - .className(PropertyDefinitionValidator.class.getName()) - .supportedOperations(List.of("DELETE")) - .supportedEntityAspectNames( - List.of( - AspectPluginConfig.EntityAspectName.builder() - .entityName(STRUCTURED_PROPERTY_ENTITY_NAME) - .aspectName(Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME) - .build(), - AspectPluginConfig.EntityAspectName.builder() - .entityName(STRUCTURED_PROPERTY_ENTITY_NAME) - .aspectName("structuredPropertyKey") - .build())) - .build()); + new PropertyDefinitionValidator() + .setConfig( + AspectPluginConfig.builder() + .enabled(true) + .className(PropertyDefinitionValidator.class.getName()) + .supportedOperations(List.of("DELETE")) + .supportedEntityAspectNames( + List.of( + AspectPluginConfig.EntityAspectName.builder() + .entityName(STRUCTURED_PROPERTY_ENTITY_NAME) + .aspectName(Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME) + .build(), + AspectPluginConfig.EntityAspectName.builder() + .entityName(STRUCTURED_PROPERTY_ENTITY_NAME) + .aspectName("structuredPropertyKey") + .build())) + .build()); assertEquals( test.validateProposed( diff --git a/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/StructuredPropertiesValidatorTest.java b/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/StructuredPropertiesValidatorTest.java index 5d63d8c8ba5e7..77cf453f517be 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/StructuredPropertiesValidatorTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/aspect/validators/StructuredPropertiesValidatorTest.java @@ -4,6 +4,9 @@ import com.linkedin.common.Status; import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; import com.linkedin.metadata.aspect.validation.StructuredPropertiesValidator; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.structured.PrimitivePropertyValue; @@ -19,6 +22,9 @@ import com.linkedin.test.metadata.aspect.batch.TestMCP; import java.net.URISyntaxException; import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; import org.testng.Assert; import org.testng.annotations.Test; @@ -26,6 +32,9 @@ public class StructuredPropertiesValidatorTest { private static final EntityRegistry TEST_REGISTRY = new TestEntityRegistry(); + private static final Urn TEST_DATASET_URN = + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:datahub,Test,PROD)"); + @Test public void testValidateAspectNumberUpsert() throws URISyntaxException { Urn propertyUrn = @@ -268,4 +277,215 @@ propertyUrn, numberPropertyDef, new Status().setRemoved(true))) 1, "Should have raised exception for soft deleted definition"); } + + @Test + public void testValidateImmutableMutation() throws URISyntaxException { + Urn mutablePropertyUrn = + Urn.createFromString("urn:li:structuredProperty:io.acryl.mutableProperty"); + StructuredPropertyDefinition mutablePropertyDef = + new StructuredPropertyDefinition() + .setImmutable(false) + .setValueType(Urn.createFromString("urn:li:type:datahub.number")) + .setAllowedValues( + new PropertyValueArray( + List.of( + new PropertyValue().setValue(PrimitivePropertyValue.create(30.0)), + new PropertyValue().setValue(PrimitivePropertyValue.create(60.0)), + new PropertyValue().setValue(PrimitivePropertyValue.create(90.0))))); + StructuredPropertyValueAssignment mutableAssignment = + new StructuredPropertyValueAssignment() + .setPropertyUrn(mutablePropertyUrn) + .setValues(new PrimitivePropertyValueArray(PrimitivePropertyValue.create(30.0))); + StructuredProperties mutablePayload = + new StructuredProperties() + .setProperties(new StructuredPropertyValueAssignmentArray(mutableAssignment)); + + Urn immutablePropertyUrn = + Urn.createFromString("urn:li:structuredProperty:io.acryl.immutableProperty"); + StructuredPropertyDefinition immutablePropertyDef = + new StructuredPropertyDefinition() + .setImmutable(true) + .setValueType(Urn.createFromString("urn:li:type:datahub.number")) + .setAllowedValues( + new PropertyValueArray( + List.of( + new PropertyValue().setValue(PrimitivePropertyValue.create(30.0)), + new PropertyValue().setValue(PrimitivePropertyValue.create(60.0)), + new PropertyValue().setValue(PrimitivePropertyValue.create(90.0))))); + StructuredPropertyValueAssignment immutableAssignment = + new StructuredPropertyValueAssignment() + .setPropertyUrn(immutablePropertyUrn) + .setValues(new PrimitivePropertyValueArray(PrimitivePropertyValue.create(30.0))); + StructuredProperties immutablePayload = + new StructuredProperties() + .setProperties(new StructuredPropertyValueAssignmentArray(immutableAssignment)); + + // No previous values for either + boolean noPreviousValid = + StructuredPropertiesValidator.validateImmutable( + Stream.concat( + TestMCP.ofOneMCP(TEST_DATASET_URN, null, mutablePayload, TEST_REGISTRY) + .stream(), + TestMCP.ofOneMCP( + TEST_DATASET_URN, null, immutablePayload, TEST_REGISTRY) + .stream()) + .collect(Collectors.toSet()), + new MockAspectRetriever( + Map.of( + mutablePropertyUrn, + List.of(mutablePropertyDef), + immutablePropertyUrn, + List.of(immutablePropertyDef)))) + .count() + == 0; + Assert.assertTrue(noPreviousValid); + + // Unchanged values of previous (no issues with immutability) + boolean noChangeValid = + StructuredPropertiesValidator.validateImmutable( + Stream.concat( + TestMCP.ofOneMCP( + TEST_DATASET_URN, mutablePayload, mutablePayload, TEST_REGISTRY) + .stream(), + TestMCP.ofOneMCP( + TEST_DATASET_URN, immutablePayload, immutablePayload, TEST_REGISTRY) + .stream()) + .collect(Collectors.toSet()), + new MockAspectRetriever( + Map.of( + mutablePropertyUrn, + List.of(mutablePropertyDef), + immutablePropertyUrn, + List.of(immutablePropertyDef)))) + .count() + == 0; + Assert.assertTrue(noChangeValid); + + // invalid + StructuredPropertyValueAssignment immutableAssignment2 = + new StructuredPropertyValueAssignment() + .setPropertyUrn(immutablePropertyUrn) + .setValues(new PrimitivePropertyValueArray(PrimitivePropertyValue.create(60.0))); + StructuredProperties immutablePayload2 = + new StructuredProperties() + .setProperties(new StructuredPropertyValueAssignmentArray(immutableAssignment2)); + + List exceptions = + StructuredPropertiesValidator.validateImmutable( + Stream.concat( + TestMCP.ofOneMCP( + TEST_DATASET_URN, mutablePayload, mutablePayload, TEST_REGISTRY) + .stream(), + TestMCP.ofOneMCP( + TEST_DATASET_URN, immutablePayload, immutablePayload2, TEST_REGISTRY) + .stream()) + .collect(Collectors.toSet()), + new MockAspectRetriever( + Map.of( + mutablePropertyUrn, + List.of(mutablePropertyDef), + immutablePropertyUrn, + List.of(immutablePropertyDef)))) + .collect(Collectors.toList()); + + Assert.assertEquals(exceptions.size(), 1, "Expected rejected mutation of immutable property."); + Assert.assertEquals(exceptions.get(0).getExceptionKey().getKey(), TEST_DATASET_URN); + Assert.assertTrue( + exceptions.get(0).getMessage().contains("Cannot mutate an immutable property")); + } + + @Test + public void testValidateImmutableDelete() throws URISyntaxException { + final StructuredProperties emptyProperties = + new StructuredProperties().setProperties(new StructuredPropertyValueAssignmentArray()); + + Urn mutablePropertyUrn = + Urn.createFromString("urn:li:structuredProperty:io.acryl.mutableProperty"); + StructuredPropertyDefinition mutablePropertyDef = + new StructuredPropertyDefinition() + .setImmutable(false) + .setValueType(Urn.createFromString("urn:li:type:datahub.number")) + .setAllowedValues( + new PropertyValueArray( + List.of( + new PropertyValue().setValue(PrimitivePropertyValue.create(30.0)), + new PropertyValue().setValue(PrimitivePropertyValue.create(60.0)), + new PropertyValue().setValue(PrimitivePropertyValue.create(90.0))))); + StructuredPropertyValueAssignment mutableAssignment = + new StructuredPropertyValueAssignment() + .setPropertyUrn(mutablePropertyUrn) + .setValues(new PrimitivePropertyValueArray(PrimitivePropertyValue.create(30.0))); + StructuredProperties mutablePayload = + new StructuredProperties() + .setProperties(new StructuredPropertyValueAssignmentArray(mutableAssignment)); + + Urn immutablePropertyUrn = + Urn.createFromString("urn:li:structuredProperty:io.acryl.immutableProperty"); + StructuredPropertyDefinition immutablePropertyDef = + new StructuredPropertyDefinition() + .setImmutable(true) + .setValueType(Urn.createFromString("urn:li:type:datahub.number")) + .setAllowedValues( + new PropertyValueArray( + List.of( + new PropertyValue().setValue(PrimitivePropertyValue.create(30.0)), + new PropertyValue().setValue(PrimitivePropertyValue.create(60.0)), + new PropertyValue().setValue(PrimitivePropertyValue.create(90.0))))); + StructuredPropertyValueAssignment immutableAssignment = + new StructuredPropertyValueAssignment() + .setPropertyUrn(immutablePropertyUrn) + .setValues(new PrimitivePropertyValueArray(PrimitivePropertyValue.create(30.0))); + StructuredProperties immutablePayload = + new StructuredProperties() + .setProperties(new StructuredPropertyValueAssignmentArray(immutableAssignment)); + + // Delete mutable, Delete with no-op for immutable allowed + boolean noPreviousValid = + StructuredPropertiesValidator.validateImmutable( + Stream.concat( + TestMCP.ofOneMCP( + TEST_DATASET_URN, mutablePayload, emptyProperties, TEST_REGISTRY) + .stream(), + TestMCP.ofOneMCP( + TEST_DATASET_URN, immutablePayload, immutablePayload, TEST_REGISTRY) + .stream()) + // set to DELETE + .map(i -> ((TestMCP) i).toBuilder().changeType(ChangeType.DELETE).build()) + .collect(Collectors.toSet()), + new MockAspectRetriever( + Map.of( + mutablePropertyUrn, + List.of(mutablePropertyDef), + immutablePropertyUrn, + List.of(immutablePropertyDef)))) + .count() + == 0; + Assert.assertTrue(noPreviousValid); + + // invalid (delete of mutable allowed, delete of immutable denied) + List exceptions = + StructuredPropertiesValidator.validateImmutable( + Stream.concat( + TestMCP.ofOneMCP( + TEST_DATASET_URN, mutablePayload, emptyProperties, TEST_REGISTRY) + .stream(), + TestMCP.ofOneMCP( + TEST_DATASET_URN, immutablePayload, emptyProperties, TEST_REGISTRY) + .stream()) + // set to DELETE + .map(i -> ((TestMCP) i).toBuilder().changeType(ChangeType.DELETE).build()) + .collect(Collectors.toSet()), + new MockAspectRetriever( + Map.of( + mutablePropertyUrn, + List.of(mutablePropertyDef), + immutablePropertyUrn, + List.of(immutablePropertyDef)))) + .collect(Collectors.toList()); + + Assert.assertEquals(exceptions.size(), 1, "Expected rejected delete of immutable property."); + Assert.assertEquals(exceptions.get(0).getExceptionKey().getKey(), TEST_DATASET_URN); + Assert.assertTrue( + exceptions.get(0).getMessage().contains("Cannot delete an immutable property")); + } } diff --git a/entity-registry/src/test/java/com/linkedin/metadata/models/OpenApiSpecBuilderTest.java b/entity-registry/src/test/java/com/linkedin/metadata/models/OpenApiSpecBuilderTest.java index 8589bc1639f5c..3f00cfeac7fb9 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/models/OpenApiSpecBuilderTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/models/OpenApiSpecBuilderTest.java @@ -98,7 +98,7 @@ public void testOpenApiSpecBuilder() throws Exception { new ConfigEntityRegistry( TestEntityProfile.class.getClassLoader().getResourceAsStream("entity-registry.yml")); MergedEntityRegistry er = new MergedEntityRegistry(configEntityRegistry); - new PluginEntityRegistryLoader(TestConstants.BASE_DIRECTORY, 1) + new PluginEntityRegistryLoader(TestConstants.BASE_DIRECTORY, 1, null) .withBaseRegistry(er) .start(true); diff --git a/entity-registry/src/test/java/com/linkedin/metadata/models/registry/PatchEntityRegistryTest.java b/entity-registry/src/test/java/com/linkedin/metadata/models/registry/PatchEntityRegistryTest.java index 27227f133ab55..dd9f6a56428e0 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/models/registry/PatchEntityRegistryTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/models/registry/PatchEntityRegistryTest.java @@ -22,7 +22,8 @@ public void testEntityRegistryLoad() throws Exception, EntityRegistryException { + "/" + TestConstants.TEST_VERSION.toString(), TestConstants.TEST_REGISTRY, - TestConstants.TEST_VERSION); + TestConstants.TEST_VERSION, + null); Map entitySpecs = patchEntityRegistry.getEntitySpecs(); assertEquals(entitySpecs.values().size(), 1); @@ -64,7 +65,8 @@ public void testEntityRegistryWithKeyLoad() throws Exception, EntityRegistryExce DataSchemaFactory.getClassLoader(pluginLocation).stream().toList(), Paths.get("src/test_plugins/mycompany-full-model/0.0.1/entity-registry.yaml"), TestConstants.TEST_REGISTRY, - TestConstants.TEST_VERSION); + TestConstants.TEST_VERSION, + null); Map entitySpecs = patchEntityRegistry.getEntitySpecs(); assertEquals(entitySpecs.values().size(), 1); diff --git a/entity-registry/src/test/java/com/linkedin/metadata/models/registry/PluginEntityRegistryLoaderTest.java b/entity-registry/src/test/java/com/linkedin/metadata/models/registry/PluginEntityRegistryLoaderTest.java index 13582696bde03..47c29405a774e 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/models/registry/PluginEntityRegistryLoaderTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/models/registry/PluginEntityRegistryLoaderTest.java @@ -76,7 +76,7 @@ public AspectTemplateEngine getAspectTemplateEngine() { MergedEntityRegistry configEntityRegistry = new MergedEntityRegistry(baseEntityRegistry); PluginEntityRegistryLoader pluginEntityRegistryLoader = - new PluginEntityRegistryLoader(TestConstants.BASE_DIRECTORY, 60) + new PluginEntityRegistryLoader(TestConstants.BASE_DIRECTORY, 60, null) .withBaseRegistry(configEntityRegistry) .start(true); assertEquals(pluginEntityRegistryLoader.getPatchRegistries().size(), 1); @@ -171,7 +171,7 @@ public void testEntityRegistryWithGoodBase() throws FileNotFoundException, Inter MergedEntityRegistry mergedEntityRegistry = new MergedEntityRegistry(getBaseEntityRegistry()); PluginEntityRegistryLoader pluginEntityRegistryLoader = - new PluginEntityRegistryLoader(BASE_DIRECTORY, 60) + new PluginEntityRegistryLoader(BASE_DIRECTORY, 60, null) .withBaseRegistry(mergedEntityRegistry) .start(true); assertEquals(pluginEntityRegistryLoader.getPatchRegistries().size(), 1); @@ -216,7 +216,7 @@ public void testEntityRegistryVersioning() throws InterruptedException { String multiversionPluginDir = "src/test_plugins/"; PluginEntityRegistryLoader pluginEntityRegistryLoader = - new PluginEntityRegistryLoader(multiversionPluginDir, 60) + new PluginEntityRegistryLoader(multiversionPluginDir, 60, null) .withBaseRegistry(mergedEntityRegistry) .start(true); Map>> diff --git a/entity-registry/src/test/java/com/linkedin/test/metadata/aspect/batch/TestMCP.java b/entity-registry/src/test/java/com/linkedin/test/metadata/aspect/batch/TestMCP.java index 20d01dc55934a..1e1efe4238187 100644 --- a/entity-registry/src/test/java/com/linkedin/test/metadata/aspect/batch/TestMCP.java +++ b/entity-registry/src/test/java/com/linkedin/test/metadata/aspect/batch/TestMCP.java @@ -27,7 +27,7 @@ import lombok.Getter; import lombok.Setter; -@Builder +@Builder(toBuilder = true) @Getter public class TestMCP implements ChangeMCP { private static final String TEST_DATASET_URN = diff --git a/entity-registry/src/test/resources/patch/large_upstream_lineage_aspect.json.gz b/entity-registry/src/test/resources/patch/large_upstream_lineage_aspect.json.gz new file mode 100644 index 0000000000000..618e84efe274a Binary files /dev/null and b/entity-registry/src/test/resources/patch/large_upstream_lineage_aspect.json.gz differ diff --git a/entity-registry/src/test/resources/patch/large_upstream_lineage_mcp.json.gz b/entity-registry/src/test/resources/patch/large_upstream_lineage_mcp.json.gz new file mode 100644 index 0000000000000..0c9fa4f0f359c Binary files /dev/null and b/entity-registry/src/test/resources/patch/large_upstream_lineage_mcp.json.gz differ diff --git a/li-utils/src/main/java/com/datahub/util/RecordUtils.java b/li-utils/src/main/java/com/datahub/util/RecordUtils.java index d57875f79de61..8183ecc21ee27 100644 --- a/li-utils/src/main/java/com/datahub/util/RecordUtils.java +++ b/li-utils/src/main/java/com/datahub/util/RecordUtils.java @@ -463,7 +463,7 @@ private static Object invokeMethod(@Nonnull RecordTemplate record, @Nonnull Stri METHOD_CACHE.putIfAbsent(record.getClass(), getMethodsFromRecordTemplate(record)); try { return METHOD_CACHE.get(record.getClass()).get(fieldName).invoke(record); - } catch (IllegalAccessException | InvocationTargetException e) { + } catch (NullPointerException | IllegalAccessException | InvocationTargetException e) { throw new RuntimeException( String.format( "Failed to execute method for class [%s], field [%s]", diff --git a/li-utils/src/main/java/com/linkedin/metadata/Constants.java b/li-utils/src/main/java/com/linkedin/metadata/Constants.java index c200a4bc30d19..66ed48a428a21 100644 --- a/li-utils/src/main/java/com/linkedin/metadata/Constants.java +++ b/li-utils/src/main/java/com/linkedin/metadata/Constants.java @@ -358,6 +358,10 @@ public class Constants { public static final String GLOBAL_SETTINGS_INFO_ASPECT_NAME = "globalSettingsInfo"; public static final Urn GLOBAL_SETTINGS_URN = Urn.createFromTuple(GLOBAL_SETTINGS_ENTITY_NAME, 0); + // Connection + public static final String DATAHUB_CONNECTION_ENTITY_NAME = "dataHubConnection"; + public static final String DATAHUB_CONNECTION_DETAILS_ASPECT_NAME = "dataHubConnectionDetails"; + // Relationships public static final String IS_MEMBER_OF_GROUP_RELATIONSHIP_NAME = "IsMemberOfGroup"; public static final String IS_MEMBER_OF_NATIVE_GROUP_RELATIONSHIP_NAME = "IsMemberOfNativeGroup"; diff --git a/li-utils/src/main/pegasus/com/linkedin/common/FabricType.pdl b/li-utils/src/main/pegasus/com/linkedin/common/FabricType.pdl index c5959ac4cc8fa..366843e460cb3 100644 --- a/li-utils/src/main/pegasus/com/linkedin/common/FabricType.pdl +++ b/li-utils/src/main/pegasus/com/linkedin/common/FabricType.pdl @@ -54,4 +54,9 @@ enum FabricType { * Designates corporation fabrics */ CORP + + /** + * Designates review fabrics + */ + RVW } diff --git a/metadata-ingestion-modules/airflow-plugin/setup.py b/metadata-ingestion-modules/airflow-plugin/setup.py index 90167126bc349..065e9454c5d9e 100644 --- a/metadata-ingestion-modules/airflow-plugin/setup.py +++ b/metadata-ingestion-modules/airflow-plugin/setup.py @@ -42,7 +42,7 @@ def get_long_description(): # We remain restrictive on the versions allowed here to prevent # us from being broken by backwards-incompatible changes in the # underlying package. - "openlineage-airflow>=1.2.0,<=1.7.0", + "openlineage-airflow>=1.2.0,<=1.12.0", }, } diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py index 197ae5298aa83..f91c77591d35b 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/_extractors.py @@ -59,6 +59,10 @@ def __init__(self): for operator in _sql_operator_overrides: self.task_to_extractor.extractors[operator] = GenericSqlExtractor + self.task_to_extractor.extractors[ + "BigQueryInsertJobOperator" + ] = BigQueryInsertJobOperatorExtractor + self._graph: Optional["DataHubGraph"] = None @contextlib.contextmanager @@ -78,7 +82,7 @@ def _patch_extractors(self): unittest.mock.patch.object( SnowflakeExtractor, "default_schema", - property(snowflake_default_schema), + property(_snowflake_default_schema), ) ) @@ -166,12 +170,6 @@ def _sql_extractor_extract(self: "SqlExtractor") -> TaskMetadata: task_name = f"{self.operator.dag_id}.{self.operator.task_id}" sql = self.operator.sql - run_facets = {} - job_facets = {"sql": SqlJobFacet(query=self._normalize_sql(sql))} - - # Prepare to run the SQL parser. - graph = self.context.get(_DATAHUB_GRAPH_CONTEXT_KEY, None) - default_database = getattr(self.operator, "database", None) if not default_database: default_database = self.database @@ -185,6 +183,31 @@ def _sql_extractor_extract(self: "SqlExtractor") -> TaskMetadata: # Run the SQL parser. scheme = self.scheme platform = OL_SCHEME_TWEAKS.get(scheme, scheme) + + return _parse_sql_into_task_metadata( + self, + sql, + platform=platform, + default_database=default_database, + default_schema=default_schema, + ) + + +def _parse_sql_into_task_metadata( + self: "BaseExtractor", + sql: str, + platform: str, + default_database: Optional[str], + default_schema: Optional[str], +) -> TaskMetadata: + task_name = f"{self.operator.dag_id}.{self.operator.task_id}" + + run_facets = {} + job_facets = {"sql": SqlJobFacet(query=self._normalize_sql(sql))} + + # Prepare to run the SQL parser. + graph = self.context.get(_DATAHUB_GRAPH_CONTEXT_KEY, None) + self.log.debug( "Running the SQL parser %s (platform=%s, default db=%s, schema=%s): %s", "with graph client" if graph else "in offline mode", @@ -232,7 +255,28 @@ def _sql_extractor_extract(self: "SqlExtractor") -> TaskMetadata: ) -def snowflake_default_schema(self: "SnowflakeExtractor") -> Optional[str]: +class BigQueryInsertJobOperatorExtractor(BaseExtractor): + def extract(self) -> Optional[TaskMetadata]: + from airflow.providers.google.cloud.operators.bigquery import ( + BigQueryInsertJobOperator, # type: ignore + ) + + operator: "BigQueryInsertJobOperator" = self.operator + sql = operator.configuration.get("query") + if not sql: + self.log.warning("No query found in BigQueryInsertJobOperator") + return None + + return _parse_sql_into_task_metadata( + self, + sql, + platform="bigquery", + default_database=operator.project_id, + default_schema=None, + ) + + +def _snowflake_default_schema(self: "SnowflakeExtractor") -> Optional[str]: if hasattr(self.operator, "schema") and self.operator.schema is not None: return self.operator.schema return ( diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py index 070fc0526eca3..d67754605c71b 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/client/airflow_generator.py @@ -420,6 +420,7 @@ def run_datajob( config: Optional[DatahubLineageConfig] = None, ) -> DataProcessInstance: if datajob is None: + assert ti.task is not None datajob = AirflowGenerator.generate_datajob( cluster, ti.task, dag, config=config ) @@ -428,8 +429,8 @@ def run_datajob( dpi = DataProcessInstance.from_datajob( datajob=datajob, id=f"{dag.dag_id}_{ti.task_id}_{dag_run.run_id}", - clone_inlets=True, - clone_outlets=True, + clone_inlets=config is None or config.materialize_iolets, + clone_outlets=config is None or config.materialize_iolets, ) job_property_bag: Dict[str, str] = {} job_property_bag["run_id"] = str(dag_run.run_id) @@ -509,6 +510,7 @@ def complete_datajob( :return: DataProcessInstance """ if datajob is None: + assert ti.task is not None datajob = AirflowGenerator.generate_datajob( cluster, ti.task, dag, config=config ) @@ -530,6 +532,7 @@ def complete_datajob( f"Result should be either success or failure and it was {ti.state}" ) + assert datajob is not None dpi = DataProcessInstance.from_datajob( datajob=datajob, id=f"{dag.dag_id}_{ti.task_id}_{dag_run.run_id}", diff --git a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py index ac5dc00e0e639..15f76a8b1e1d0 100644 --- a/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py +++ b/metadata-ingestion-modules/airflow-plugin/src/datahub_airflow_plugin/datahub_listener.py @@ -3,6 +3,7 @@ import logging import os import threading +import time from typing import TYPE_CHECKING, Callable, Dict, List, Optional, TypeVar, cast import airflow @@ -16,6 +17,8 @@ FineGrainedLineageClass, FineGrainedLineageDownstreamTypeClass, FineGrainedLineageUpstreamTypeClass, + OperationClass, + OperationTypeClass, StatusClass, ) from datahub.sql_parsing.sqlglot_lineage import SqlParsingResult @@ -357,6 +360,7 @@ def on_task_instance_running( # The type ignore is to placate mypy on Airflow 2.1.x. dagrun: "DagRun" = task_instance.dag_run # type: ignore[attr-defined] task = task_instance.task + assert task is not None dag: "DAG" = task.dag # type: ignore[assignment] self._task_holder.set_task(task_instance) @@ -414,11 +418,37 @@ def on_task_instance_running( f"DataHub listener finished processing notification about task instance start for {task_instance.task_id}" ) + if self.config.materialize_iolets: + for outlet in datajob.outlets: + reported_time: int = int(time.time() * 1000) + operation = OperationClass( + timestampMillis=reported_time, + operationType=OperationTypeClass.CREATE, + lastUpdatedTimestamp=reported_time, + actor=builder.make_user_urn("airflow"), + ) + + operation_mcp = MetadataChangeProposalWrapper( + entityUrn=str(outlet), aspect=operation + ) + + self.emitter.emit(operation_mcp) + logger.debug(f"Emitted Dataset Operation: {outlet}") + else: + if self.graph: + for outlet in datajob.outlets: + if not self.graph.exists(str(outlet)): + logger.warning(f"Dataset {str(outlet)} not materialized") + for inlet in datajob.inlets: + if not self.graph.exists(str(inlet)): + logger.warning(f"Dataset {str(inlet)} not materialized") + def on_task_instance_finish( self, task_instance: "TaskInstance", status: InstanceRunResult ) -> None: dagrun: "DagRun" = task_instance.dag_run # type: ignore[attr-defined] task = self._task_holder.get_task(task_instance) or task_instance.task + assert task is not None dag: "DAG" = task.dag # type: ignore[assignment] datajob = AirflowGenerator.generate_datajob( diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets.json index 7c52cbcddc13c..8b1bad5b55874 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets.json @@ -368,6 +368,42 @@ } } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1714671978982, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "actor": "urn:li:corpuser:airflow", + "operationType": "CREATE", + "lastUpdatedTimestamp": 1714671978982 + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1714671978991, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "actor": "urn:li:corpuser:airflow", + "operationType": "CREATE", + "lastUpdatedTimestamp": 1714671978991 + } + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)", @@ -503,6 +539,7 @@ } } ], + "ownerTypes": {}, "lastModified": { "time": 0, "actor": "urn:li:corpuser:airflow" @@ -528,7 +565,7 @@ "aspectName": "dataProcessInstanceRunEvent", "aspect": { "json": { - "timestampMillis": 1701223417702, + "timestampMillis": 1714671979032, "partitionSpec": { "type": "FULL_TABLE", "partition": "FULL_TABLE_SNAPSHOT" diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets_no_dag_listener.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets_no_dag_listener.json index 150f95d5171c7..589cd32ae3eb7 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets_no_dag_listener.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_basic_iolets_no_dag_listener.json @@ -368,6 +368,42 @@ } } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1714676628119, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "actor": "urn:li:corpuser:airflow", + "operationType": "CREATE", + "lastUpdatedTimestamp": 1714676628119 + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableE,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1714676628127, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "actor": "urn:li:corpuser:airflow", + "operationType": "CREATE", + "lastUpdatedTimestamp": 1714676628127 + } + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,basic_iolets,prod),run_data_task)", diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag.json index 0248ab0473c9e..653d8f7e30530 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag.json @@ -297,6 +297,24 @@ } } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1714671938600, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "actor": "urn:li:corpuser:airflow", + "operationType": "CREATE", + "lastUpdatedTimestamp": 1714671938600 + } + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)", diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag_no_dag_listener.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag_no_dag_listener.json index 7860251fc22dc..da08d2addf7c9 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag_no_dag_listener.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_simple_dag_no_dag_listener.json @@ -297,6 +297,24 @@ } } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,mydb.schema.tableD,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1714676586630, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "actor": "urn:li:corpuser:airflow", + "operationType": "CREATE", + "lastUpdatedTimestamp": 1714676586630 + } + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,simple_dag,prod),task_1)", diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_snowflake_operator.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_snowflake_operator.json index 1bf0820c7cb41..331ecd353ba26 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_snowflake_operator.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_snowflake_operator.json @@ -328,6 +328,24 @@ } } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,datahub_test_database.datahub_test_schema.processed_costs,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1714672017187, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "actor": "urn:li:corpuser:airflow", + "operationType": "CREATE", + "lastUpdatedTimestamp": 1714672017187 + } + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,snowflake_operator,prod),transform_cost_table)", diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json index 313abad9c5546..e85a07b194e4f 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator.json @@ -271,6 +271,24 @@ } } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1714672059338, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "actor": "urn:li:corpuser:airflow", + "operationType": "CREATE", + "lastUpdatedTimestamp": 1714672059338 + } + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)", @@ -635,6 +653,24 @@ } } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1714672062927, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "actor": "urn:li:corpuser:airflow", + "operationType": "CREATE", + "lastUpdatedTimestamp": 1714672062927 + } + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)", @@ -1022,6 +1058,24 @@ } } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1714672066747, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "actor": "urn:li:corpuser:airflow", + "operationType": "CREATE", + "lastUpdatedTimestamp": 1714672066747 + } + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)", diff --git a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json index 60beff71c46c6..47f7cdca68d49 100644 --- a/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json +++ b/metadata-ingestion-modules/airflow-plugin/tests/integration/goldens/v2_sqlite_operator_no_dag_listener.json @@ -271,6 +271,24 @@ } } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1714676666839, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "actor": "urn:li:corpuser:airflow", + "operationType": "CREATE", + "lastUpdatedTimestamp": 1714676666839 + } + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),create_cost_table)", @@ -692,6 +710,24 @@ } } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.costs,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1714676669640, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "actor": "urn:li:corpuser:airflow", + "operationType": "CREATE", + "lastUpdatedTimestamp": 1714676669640 + } + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),populate_cost_table)", @@ -1136,6 +1172,24 @@ } } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:sqlite,public.processed_costs,PROD)", + "changeType": "UPSERT", + "aspectName": "operation", + "aspect": { + "json": { + "timestampMillis": 1714676672665, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "actor": "urn:li:corpuser:airflow", + "operationType": "CREATE", + "lastUpdatedTimestamp": 1714676672665 + } + } +}, { "entityType": "dataJob", "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,sqlite_operator,prod),transform_cost_table)", diff --git a/metadata-ingestion/cli-ingestion.md b/metadata-ingestion/cli-ingestion.md index 48cc4ef09db91..b15dd2a501995 100644 --- a/metadata-ingestion/cli-ingestion.md +++ b/metadata-ingestion/cli-ingestion.md @@ -25,10 +25,10 @@ Check out the [alternative installation options](../docs/cli.md#alternate-instal ## Configuring a Recipe -Create a recipe.yml file that defines the source and sink for metadata, as shown below. +Create a `recipe.yml` file that defines the source and sink for metadata, as shown below. ```yaml -# my_reipe.yml +# recipe.yml source: type: config: @@ -48,7 +48,7 @@ For more information and examples on configuring recipes, please refer to [Recip You can run ingestion using `datahub ingest` like below. ```shell -datahub ingest -c +datahub ingest -c ``` ## Reference diff --git a/metadata-ingestion/docs/dev_guides/profiling_ingestions.md b/metadata-ingestion/docs/dev_guides/profiling_ingestions.md index 0825a8d61a66b..41b80bd72f029 100644 --- a/metadata-ingestion/docs/dev_guides/profiling_ingestions.md +++ b/metadata-ingestion/docs/dev_guides/profiling_ingestions.md @@ -88,7 +88,3 @@ This will generate an interactive HTML file for analysis: `memray` has an extensive set of features for memory investigation. Take a look at their [documentation](https://bloomberg.github.io/memray/overview.html) to see the full feature set. - -## Questions - -If you've got any questions on configuring profiling, feel free to ping us on [our Slack](https://datahubproject.io/slack?utm_source=docs&utm_medium=docs&utm_campaign=docs_page_link)! diff --git a/metadata-ingestion/docs/dev_guides/sql_profiles.md b/metadata-ingestion/docs/dev_guides/sql_profiles.md index d211ef1f3f473..994d6fe489331 100644 --- a/metadata-ingestion/docs/dev_guides/sql_profiles.md +++ b/metadata-ingestion/docs/dev_guides/sql_profiles.md @@ -28,6 +28,3 @@ Extracts: SQL profiling is supported for all SQL sources. Check the individual source page to verify if it supports profiling. -## Questions - -If you've got any questions on configuring profiling, feel free to ping us on [our Slack](https://datahubproject.io/slack?utm_source=docs&utm_medium=docs&utm_campaign=docs_page_link)! diff --git a/metadata-ingestion/docs/sources/dynamodb/dynamodb_pre.md b/metadata-ingestion/docs/sources/dynamodb/dynamodb_pre.md index 598d0ecdb3786..0e85ec7a8cc61 100644 --- a/metadata-ingestion/docs/sources/dynamodb/dynamodb_pre.md +++ b/metadata-ingestion/docs/sources/dynamodb/dynamodb_pre.md @@ -1,5 +1,7 @@ ### Prerequisities +Notice of breaking change: in the latest version of the DynamoDB connector, `aws_region` is now a required configuration. The connector will no longer loop through all AWS regions; instead, it will only use the region passed into the recipe configuration. + In order to execute this source, you need to attach the `AmazonDynamoDBReadOnlyAccess` policy to a user in your AWS account. Then create an API access key and secret for the user. For a user to be able to create API access key, it needs the following access key permissions. Your AWS account admin can create a policy with these permissions and attach to the user, you can find more details in [Managing access keys for IAM users](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html) diff --git a/metadata-ingestion/docs/sources/dynamodb/dynamodb_recipe.yml b/metadata-ingestion/docs/sources/dynamodb/dynamodb_recipe.yml index 4f4edc9a7d496..2b4149fa92aac 100644 --- a/metadata-ingestion/docs/sources/dynamodb/dynamodb_recipe.yml +++ b/metadata-ingestion/docs/sources/dynamodb/dynamodb_recipe.yml @@ -4,6 +4,8 @@ source: platform_instance: "AWS_ACCOUNT_ID" aws_access_key_id: "${AWS_ACCESS_KEY_ID}" aws_secret_access_key: "${AWS_SECRET_ACCESS_KEY}" + aws_session_token: "${AWS_SESSION_TOKEN}" + aws_region: "${AWS_REGION}" # # If there are items that have most representative fields of the table, users could use the # `include_table_item` option to provide a list of primary keys of the table in dynamodb format. diff --git a/metadata-ingestion/docs/sources/hana/hana.md b/metadata-ingestion/docs/sources/hana/hana.md index 34f7df2e17441..53fff0e67f348 100644 --- a/metadata-ingestion/docs/sources/hana/hana.md +++ b/metadata-ingestion/docs/sources/hana/hana.md @@ -5,6 +5,3 @@ The implementation uses the [SQLAlchemy Dialect for SAP HANA](https://github.com Under the hood, [SQLAlchemy Dialect for SAP HANA](https://github.com/SAP/sqlalchemy-hana) uses the SAP HANA Python Driver hdbcli. Therefore it is compatible with HANA or HANA express versions since HANA SPS 2. -## Questions - -If you've got any questions on configuring this source, feel free to ping us on [our Slack](https://datahubproject.io/slack?utm_source=docs&utm_medium=docs&utm_campaign=docs_page_link)! diff --git a/metadata-ingestion/docs/transformer/dataset_transformer.md b/metadata-ingestion/docs/transformer/dataset_transformer.md index c0a8d31bca4c0..64d1438cfcc73 100644 --- a/metadata-ingestion/docs/transformer/dataset_transformer.md +++ b/metadata-ingestion/docs/transformer/dataset_transformer.md @@ -13,7 +13,7 @@ The below table shows transformer which can transform aspects of entity [Dataset | `glossaryTerms` | - [Simple Add Dataset glossaryTerms ](#simple-add-dataset-glossaryterms)
- [Pattern Add Dataset glossaryTerms](#pattern-add-dataset-glossaryterms) | | `schemaMetadata` | - [Pattern Add Dataset Schema Field glossaryTerms](#pattern-add-dataset-schema-field-glossaryterms)
- [Pattern Add Dataset Schema Field globalTags](#pattern-add-dataset-schema-field-globaltags) | | `datasetProperties` | - [Simple Add Dataset datasetProperties](#simple-add-dataset-datasetproperties)
- [Add Dataset datasetProperties](#add-dataset-datasetproperties) | -| `domains` | - [Simple Add Dataset domains](#simple-add-dataset-domains)
- [Pattern Add Dataset domains](#pattern-add-dataset-domains) | +| `domains` | - [Simple Add Dataset domains](#simple-add-dataset-domains)
- [Pattern Add Dataset domains](#pattern-add-dataset-domains)
- [Domain Mapping Based on Tags](#domain-mapping-based-on-tags) | | `dataProduct` | - [Simple Add Dataset dataProduct ](#simple-add-dataset-dataproduct)
- [Pattern Add Dataset dataProduct](#pattern-add-dataset-dataproduct)
- [Add Dataset dataProduct](#add-dataset-dataproduct) ## Extract Ownership from Tags @@ -925,6 +925,24 @@ transformers: replacement: "sub" ``` +## Clean User URN in DatasetUsageStatistics Aspect +### Config Details +| Field | Required | Type | Default | Description | +|-----------------------------|----------|---------|---------------|---------------------------------------------| +| `pattern_for_cleanup` | ✅ | list[string] | | List of suffix/prefix to remove from the Owner URN(s) | + + +Matches against a User URN in DatasetUsageStatistics aspect and remove the matching part from it +```yaml +transformers: + - type: "pattern_cleanup_dataset_usage_user" + config: + pattern_for_cleanup: + - "ABCDEF" + - (?<=_)(\w+) +``` + + ## Simple Add Dataset domains ### Config Details | Field | Required | Type | Default | Description | @@ -1046,6 +1064,61 @@ in both of the cases domain should be provisioned on DataHub GMS 'urn:li:dataset:\(urn:li:dataPlatform:postgres,postgres\.public\.n.*': ["hr"] 'urn:li:dataset:\(urn:li:dataPlatform:postgres,postgres\.public\.t.*': ["urn:li:domain:finance"] ``` + + + +## Domain Mapping Based on Tags +### Config Details + +| Field | Required | Type | Default | Description | +|-----------------|----------|-------------------------|-------------|---------------------------------------------------------------------------------------------------------| +| `domain_mapping`| ✅ | Dict[str, str] | | Dataset Entity tag as key and domain urn or name as value to map with dataset as asset. | +| `semantics` | | enum | "OVERWRITE" | Whether to OVERWRITE or PATCH the entity present on DataHub GMS.| + +
+ +let’s suppose we’d like to add domain to dataset based on tag, in this case you can use `domain_mapping_based_on_tags` transformer. + +The config, which we’d append to our ingestion recipe YAML, would look like this: + +Here we can set domains to either urn (i.e. urn:li:domain:engineering) or simple domain name (i.e. engineering) in both of the cases domain should be provisioned on DataHub GMS + +When specifying tags within the domain mapping, use the tag's simple name rather than the full tag URN. + +For example, instead of using the tag URN urn:li:tag:NeedsDocumentation, you should specify just the simple tag name NeedsDocumentation in the domain mapping configuration + +```yaml +transformers: + - type: "domain_mapping_based_on_tags" + config: + domain_mapping: + 'NeedsDocumentation': "urn:li:domain:documentation" +``` + + +`domain_mapping_based_on_tags` can be configured in below different way + +- Add domains based on tags, however overwrite the domains available for the dataset on DataHub GMS +```yaml + transformers: + - type: "domain_mapping_based_on_tags" + config: + semantics: OVERWRITE # OVERWRITE is default behaviour + domain_mapping: + 'example1': "urn:li:domain:engineering" + 'example2': "urn:li:domain:hr" + ``` +- Add domains based on tags, however keep the domains available for the dataset on DataHub GMS +```yaml + transformers: + - type: "domain_mapping_based_on_tags" + config: + semantics: PATCH + domain_mapping: + 'example1': "urn:li:domain:engineering" + 'example2': "urn:li:domain:hr" + ``` + ## Simple Add Dataset dataProduct ### Config Details | Field | Required | Type | Default | Description | diff --git a/metadata-ingestion/setup.cfg b/metadata-ingestion/setup.cfg index 25ece8ac11ef0..16af43abe3be7 100644 --- a/metadata-ingestion/setup.cfg +++ b/metadata-ingestion/setup.cfg @@ -94,6 +94,7 @@ filterwarnings = ignore:pkg_resources is deprecated as an API:DeprecationWarning ignore:Did not recognize type:sqlalchemy.exc.SAWarning ignore::datahub.configuration.pydantic_migration_helpers.PydanticDeprecatedSince20 + ignore::datahub.configuration.common.ConfigurationWarning [coverage:run] # Because of some quirks in the way setup.cfg, coverage.py, pytest-cov, diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 307e519cc9cc6..9d35b9b8cadf5 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -99,7 +99,7 @@ sqlglot_lib = { # Using an Acryl fork of sqlglot. # https://github.com/tobymao/sqlglot/compare/main...hsheth2:sqlglot:hsheth?expand=1 - "acryl-sqlglot==23.11.2.dev2", + "acryl-sqlglot[rs]==23.11.2.dev2", } classification_lib = { @@ -193,8 +193,7 @@ *sql_common, # https://github.com/snowflakedb/snowflake-sqlalchemy/issues/350 "snowflake-sqlalchemy>=1.4.3", - # See https://github.com/snowflakedb/snowflake-connector-python/pull/1348 for why 2.8.2 is blocked - "snowflake-connector-python!=2.8.2", + "snowflake-connector-python>=3.4.0", "pandas", "cryptography", "msal", @@ -374,7 +373,9 @@ # It's technically wrong for packages to depend on setuptools. However, it seems mlflow does it anyways. "setuptools", }, - "mode": {"requests", "tenacity>=8.0.1"} | sqllineage_lib | sqlglot_lib, + "mode": {"requests", "python-liquid", "tenacity>=8.0.1"} + | sqllineage_lib + | sqlglot_lib, "mongodb": {"pymongo[srv]>=3.11", "packaging"}, "mssql": sql_common | mssql_common, "mssql-odbc": sql_common | mssql_common | {"pyodbc"}, @@ -706,6 +707,8 @@ "simple_add_dataset_dataproduct = datahub.ingestion.transformer.add_dataset_dataproduct:SimpleAddDatasetDataProduct", "pattern_add_dataset_dataproduct = datahub.ingestion.transformer.add_dataset_dataproduct:PatternAddDatasetDataProduct", "replace_external_url = datahub.ingestion.transformer.replace_external_url:ReplaceExternalUrl", + "pattern_cleanup_dataset_usage_user = datahub.ingestion.transformer.pattern_cleanup_dataset_usage_user:PatternCleanupDatasetUsageUser", + "domain_mapping_based_on_tags = datahub.ingestion.transformer.dataset_domain_based_on_tags:DatasetTagDomainMapper", ], "datahub.ingestion.sink.plugins": [ "file = datahub.ingestion.sink.file:FileSink", diff --git a/metadata-ingestion/sink_docs/console.md b/metadata-ingestion/sink_docs/console.md index f24a3086c6153..539f1c5a56a22 100644 --- a/metadata-ingestion/sink_docs/console.md +++ b/metadata-ingestion/sink_docs/console.md @@ -27,7 +27,3 @@ sink: ## Config details None! - -## Questions - -If you've got any questions on configuring this sink, feel free to ping us on [our Slack](https://datahubproject.io/slack?utm_source=docs&utm_medium=docs&utm_campaign=docs_page_link)! diff --git a/metadata-ingestion/sink_docs/datahub.md b/metadata-ingestion/sink_docs/datahub.md index 53b76ddd7288a..85bd8cc2f8531 100644 --- a/metadata-ingestion/sink_docs/datahub.md +++ b/metadata-ingestion/sink_docs/datahub.md @@ -28,6 +28,17 @@ sink: server: "http://localhost:8080" ``` +If you are connecting to a hosted Acryl instance, your sink will look like +```yml +source: + # source configs +sink: + type: "datahub-rest" + config: + server: "https://.acryl.io/gms" + token: +``` + If you are running the ingestion in a container in docker and your [GMS is also running in docker](../../docker/README.md) then you should use the internal docker hostname of the GMS pod. Usually it would look something like ```yml diff --git a/metadata-ingestion/sink_docs/file.md b/metadata-ingestion/sink_docs/file.md index 7064a4dcfc8a5..2991afacbd93d 100644 --- a/metadata-ingestion/sink_docs/file.md +++ b/metadata-ingestion/sink_docs/file.md @@ -36,6 +36,3 @@ Note that a `.` is used to denote nested fields in the YAML recipe. | -------- | -------- | ------- | ------------------------- | | filename | ✅ | | Path to file to write to. | -## Questions - -If you've got any questions on configuring this sink, feel free to ping us on [our Slack](https://datahubproject.io/slack?utm_source=docs&utm_medium=docs&utm_campaign=docs_page_link)! diff --git a/metadata-ingestion/src/datahub/api/entities/datajob/dataflow.py b/metadata-ingestion/src/datahub/api/entities/datajob/dataflow.py index acd708ee81a5c..cb2c536bbab20 100644 --- a/metadata-ingestion/src/datahub/api/entities/datajob/dataflow.py +++ b/metadata-ingestion/src/datahub/api/entities/datajob/dataflow.py @@ -80,9 +80,9 @@ def __post_init__(self): ) def generate_ownership_aspect(self): - owners = set([builder.make_user_urn(owner) for owner in self.owners]) | set( - [builder.make_group_urn(owner) for owner in self.group_owners] - ) + owners = {builder.make_user_urn(owner) for owner in self.owners} | { + builder.make_group_urn(owner) for owner in self.group_owners + } ownership = OwnershipClass( owners=[ OwnerClass( diff --git a/metadata-ingestion/src/datahub/api/entities/datajob/datajob.py b/metadata-ingestion/src/datahub/api/entities/datajob/datajob.py index 0ad786d68643d..69cbcc4c3e45b 100644 --- a/metadata-ingestion/src/datahub/api/entities/datajob/datajob.py +++ b/metadata-ingestion/src/datahub/api/entities/datajob/datajob.py @@ -70,9 +70,9 @@ def __post_init__(self): ) def generate_ownership_aspect(self) -> Iterable[OwnershipClass]: - owners = set([builder.make_user_urn(owner) for owner in self.owners]) | set( - [builder.make_group_urn(owner) for owner in self.group_owners] - ) + owners = {builder.make_user_urn(owner) for owner in self.owners} | { + builder.make_group_urn(owner) for owner in self.group_owners + } ownership = OwnershipClass( owners=[ OwnerClass( diff --git a/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py b/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py index 61bda90447c62..408d6bc7256c6 100644 --- a/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py +++ b/metadata-ingestion/src/datahub/api/entities/dataproduct/dataproduct.py @@ -276,7 +276,7 @@ def from_yaml( cls, file: Path, graph: DataHubGraph, - ) -> "DataProduct": + ) -> DataProduct: with open(file) as fp: yaml = YAML(typ="rt") # default, if not specfied, is 'rt' (round-trip) orig_dictionary = yaml.load(fp) @@ -291,7 +291,7 @@ def from_yaml( return parsed_data_product @classmethod - def from_datahub(cls, graph: DataHubGraph, id: str) -> "DataProduct": + def from_datahub(cls, graph: DataHubGraph, id: str) -> DataProduct: data_product_properties: Optional[ DataProductPropertiesClass ] = graph.get_aspect(id, DataProductPropertiesClass) @@ -384,7 +384,7 @@ def _patch_ownership( patches_drop[i] = o # Figure out what if any are new owners to add - new_owners_to_add = set(o for o in new_owner_type_map) - set(owners_matched) + new_owners_to_add = {o for o in new_owner_type_map} - set(owners_matched) if new_owners_to_add: for new_owner in new_owners_to_add: new_owner_type = new_owner_type_map[new_owner] diff --git a/metadata-ingestion/src/datahub/api/entities/dataset/dataset.py b/metadata-ingestion/src/datahub/api/entities/dataset/dataset.py index 4e74a410b5f64..c71bced38f8aa 100644 --- a/metadata-ingestion/src/datahub/api/entities/dataset/dataset.py +++ b/metadata-ingestion/src/datahub/api/entities/dataset/dataset.py @@ -242,7 +242,7 @@ def generate_mcp( if self.schema_metadata: if self.schema_metadata.file: - with open(self.schema_metadata.file, "r") as schema_fp: + with open(self.schema_metadata.file) as schema_fp: schema_string = schema_fp.read() schema_metadata = SchemaMetadataClass( schemaName=self.name or self.id or self.urn or "", @@ -377,8 +377,7 @@ def generate_mcp( type="COPY", ) ) - for patch_event in patch_builder.build(): - yield patch_event + yield from patch_builder.build() logger.info(f"Created dataset {self.urn}") diff --git a/metadata-ingestion/src/datahub/api/entities/forms/forms.py b/metadata-ingestion/src/datahub/api/entities/forms/forms.py index fd260e3171ed8..5ac08b6e64ed4 100644 --- a/metadata-ingestion/src/datahub/api/entities/forms/forms.py +++ b/metadata-ingestion/src/datahub/api/entities/forms/forms.py @@ -106,7 +106,7 @@ def create(file: str) -> None: emitter: DataHubGraph with get_default_graph() as emitter: - with open(file, "r") as fp: + with open(file) as fp: forms: List[dict] = yaml.safe_load(fp) for form_raw in forms: form = Forms.parse_obj(form_raw) @@ -204,7 +204,7 @@ def validate_prompts(self, emitter: DataHubGraph) -> List[FormPromptClass]: def upload_entities_for_form(self, emitter: DataHubGraph) -> Union[None, Exception]: if self.entities and self.entities.urns: formatted_entity_urns = ", ".join( - ['"{}"'.format(value) for value in self.entities.urns] + [f'"{value}"' for value in self.entities.urns] ) query = UPLOAD_ENTITIES_FOR_FORMS.format( form_urn=self.urn, entity_urns=formatted_entity_urns @@ -281,7 +281,7 @@ def add_owners(self, emitter: DataHubGraph) -> Union[None, Exception]: @staticmethod def format_form_filter(field: str, urns: List[str]) -> str: - formatted_urns = ", ".join(['"{}"'.format(urn) for urn in urns]) + formatted_urns = ", ".join([f'"{urn}"' for urn in urns]) return FIELD_FILTER_TEMPLATE.format(field=field, values=formatted_urns) @staticmethod diff --git a/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py b/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py index ab40db5253fd1..ed97948de9034 100644 --- a/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py +++ b/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py @@ -75,6 +75,7 @@ class StructuredProperties(ConfigModel): cardinality: Optional[str] = None allowed_values: Optional[List[AllowedValue]] = None type_qualifier: Optional[TypeQualifierAllowedTypes] = None + immutable: Optional[bool] = False @property def fqn(self) -> str: @@ -97,7 +98,7 @@ def create(file: str) -> None: emitter: DataHubGraph with get_default_graph() as emitter: - with open(file, "r") as fp: + with open(file) as fp: structuredproperties: List[dict] = yaml.safe_load(fp) for structuredproperty_raw in structuredproperties: structuredproperty = StructuredProperties.parse_obj( @@ -124,6 +125,7 @@ def create(file: str) -> None: for entity_type in structuredproperty.entity_types or [] ], cardinality=structuredproperty.cardinality, + immutable=structuredproperty.immutable, allowedValues=[ PropertyValueClass( value=v.value, description=v.description diff --git a/metadata-ingestion/src/datahub/cli/config_utils.py b/metadata-ingestion/src/datahub/cli/config_utils.py index 7877a6bf6df59..8cddc41551038 100644 --- a/metadata-ingestion/src/datahub/cli/config_utils.py +++ b/metadata-ingestion/src/datahub/cli/config_utils.py @@ -84,7 +84,7 @@ def ensure_datahub_config() -> None: def get_client_config(as_dict: bool = False) -> Union[Optional[DatahubConfig], dict]: - with open(DATAHUB_CONFIG_PATH, "r") as stream: + with open(DATAHUB_CONFIG_PATH) as stream: try: config_json = yaml.safe_load(stream) if as_dict: diff --git a/metadata-ingestion/src/datahub/cli/docker_check.py b/metadata-ingestion/src/datahub/cli/docker_check.py index b80c2f3df01da..ff3965455d163 100644 --- a/metadata-ingestion/src/datahub/cli/docker_check.py +++ b/metadata-ingestion/src/datahub/cli/docker_check.py @@ -203,7 +203,7 @@ def check_docker_quickstart() -> QuickstartStatus: all_containers = set() for config_file in config_files: - with open(config_file, "r") as config_file: + with open(config_file) as config_file: all_containers.update( yaml.safe_load(config_file).get("services", {}).keys() ) diff --git a/metadata-ingestion/src/datahub/cli/docker_cli.py b/metadata-ingestion/src/datahub/cli/docker_cli.py index e35d4a5c93c2d..707a9cab076e6 100644 --- a/metadata-ingestion/src/datahub/cli/docker_cli.py +++ b/metadata-ingestion/src/datahub/cli/docker_cli.py @@ -76,7 +76,7 @@ class Architectures(Enum): m2 = "m2" -@functools.lru_cache() +@functools.lru_cache def _docker_subprocess_env() -> Dict[str, str]: # platform.machine() is equivalent to `uname -m`, as per https://stackoverflow.com/a/45124927/5004662 DOCKER_COMPOSE_PLATFORM: str = "linux/" + platform.machine() @@ -316,7 +316,7 @@ def _restore( assert os.path.exists( resolved_restore_file ), f"File {resolved_restore_file} does not exist" - with open(resolved_restore_file, "r") as fp: + with open(resolved_restore_file) as fp: result = subprocess.run( [ "bash", @@ -324,8 +324,7 @@ def _restore( f"docker exec -i {DOCKER_COMPOSE_PROJECT_NAME}-mysql-1 bash -c 'mysql -uroot -pdatahub datahub '", ], stdin=fp, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, + capture_output=True, ) if result.returncode != 0: logger.error("Failed to run MySQL restore") @@ -381,7 +380,7 @@ def _restore( ) env_fp.flush() if logger.isEnabledFor(logging.DEBUG): - with open(env_fp.name, "r") as env_fp_reader: + with open(env_fp.name) as env_fp_reader: logger.debug(f"Env file contents: {env_fp_reader.read()}") # continue to issue the restore indices command @@ -401,8 +400,7 @@ def _restore( + "acryldata/datahub-upgrade:${DATAHUB_VERSION:-head}" + " -u RestoreIndices -a clean", ], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, + capture_output=True, ) logger.info( f"Index restore command finished with status {result.returncode}" diff --git a/metadata-ingestion/src/datahub/cli/ingest_cli.py b/metadata-ingestion/src/datahub/cli/ingest_cli.py index 2e66b18e48145..453f1d2934372 100644 --- a/metadata-ingestion/src/datahub/cli/ingest_cli.py +++ b/metadata-ingestion/src/datahub/cli/ingest_cli.py @@ -588,6 +588,6 @@ def rollback( for row in unsafe_entities: writer.writerow([row.get("urn")]) - except IOError as e: + except OSError as e: logger.exception(f"Unable to save rollback failure report: {e}") sys.exit(f"Unable to write reports to {report_dir}") diff --git a/metadata-ingestion/src/datahub/cli/quickstart_versioning.py b/metadata-ingestion/src/datahub/cli/quickstart_versioning.py index 493869ac77bb8..9739af5127f4d 100644 --- a/metadata-ingestion/src/datahub/cli/quickstart_versioning.py +++ b/metadata-ingestion/src/datahub/cli/quickstart_versioning.py @@ -55,7 +55,7 @@ def fetch_quickstart_config(cls) -> "QuickstartVersionMappingConfig": "LOCAL_QUICKSTART_MAPPING_FILE is set, will try to read from local file." ) path = os.path.expanduser(LOCAL_QUICKSTART_MAPPING_FILE) - with open(path, "r") as f: + with open(path) as f: config_raw = yaml.safe_load(f) return cls.parse_obj(config_raw) @@ -70,7 +70,7 @@ def fetch_quickstart_config(cls) -> "QuickstartVersionMappingConfig": ) try: path = os.path.expanduser(DEFAULT_LOCAL_CONFIG_PATH) - with open(path, "r") as f: + with open(path) as f: config_raw = yaml.safe_load(f) except Exception: logger.debug("Couldn't read from local file either.") diff --git a/metadata-ingestion/src/datahub/configuration/common.py b/metadata-ingestion/src/datahub/configuration/common.py index 7aaa1706a6420..a5971258bcdaa 100644 --- a/metadata-ingestion/src/datahub/configuration/common.py +++ b/metadata-ingestion/src/datahub/configuration/common.py @@ -2,13 +2,24 @@ import unittest.mock from abc import ABC, abstractmethod from enum import auto -from typing import IO, Any, ClassVar, Dict, List, Optional, Type, TypeVar, Union +from typing import ( + IO, + Any, + ClassVar, + Dict, + List, + Optional, + Type, + TypeVar, + Union, + runtime_checkable, +) import pydantic from cached_property import cached_property from pydantic import BaseModel, Extra, ValidationError from pydantic.fields import Field -from typing_extensions import Protocol, runtime_checkable +from typing_extensions import Protocol from datahub.configuration._config_enum import ConfigEnum from datahub.configuration.pydantic_migration_helpers import PYDANTIC_VERSION_2 diff --git a/metadata-ingestion/src/datahub/configuration/git.py b/metadata-ingestion/src/datahub/configuration/git.py index 3c76c8da0d571..d237cd9ddd306 100644 --- a/metadata-ingestion/src/datahub/configuration/git.py +++ b/metadata-ingestion/src/datahub/configuration/git.py @@ -101,7 +101,7 @@ def deploy_key_filled_from_deploy_key_file( if v is None: deploy_key_file = values.get("deploy_key_file") if deploy_key_file is not None: - with open(deploy_key_file, "r") as fp: + with open(deploy_key_file) as fp: deploy_key = SecretStr(fp.read()) return deploy_key return v diff --git a/metadata-ingestion/src/datahub/configuration/source_common.py b/metadata-ingestion/src/datahub/configuration/source_common.py index 4b982db2715c2..a792201f9defe 100644 --- a/metadata-ingestion/src/datahub/configuration/source_common.py +++ b/metadata-ingestion/src/datahub/configuration/source_common.py @@ -10,9 +10,9 @@ DEFAULT_ENV = FabricTypeClass.PROD # Get all the constants from the FabricTypeClass. It's not an enum, so this is a bit hacky but works. -ALL_ENV_TYPES: Set[str] = set( - [value for name, value in vars(FabricTypeClass).items() if not name.startswith("_")] -) +ALL_ENV_TYPES: Set[str] = { + value for name, value in vars(FabricTypeClass).items() if not name.startswith("_") +} class PlatformInstanceConfigMixin(ConfigModel): diff --git a/metadata-ingestion/src/datahub/emitter/request_helper.py b/metadata-ingestion/src/datahub/emitter/request_helper.py index 5263ba1912592..4e1ec026648b8 100644 --- a/metadata-ingestion/src/datahub/emitter/request_helper.py +++ b/metadata-ingestion/src/datahub/emitter/request_helper.py @@ -25,4 +25,4 @@ def make_curl_command( ), url, ] - return " ".join(shlex.quote(fragment) for fragment in fragments) + return shlex.join(fragments) diff --git a/metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py b/metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py index f7ec22e1ec9c6..29e1f63dd452e 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py +++ b/metadata-ingestion/src/datahub/ingestion/api/incremental_lineage_helper.py @@ -4,6 +4,7 @@ from datahub.configuration.common import ConfigModel from datahub.emitter.mce_builder import datahub_guid, set_aspect +from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.metadata.schema_classes import ( ChartInfoClass, @@ -105,21 +106,26 @@ def auto_incremental_lineage( for wu in stream: urn = wu.get_urn() - lineage_aspect: Optional[UpstreamLineageClass] = wu.get_aspect_of_type( - UpstreamLineageClass - ) if isinstance(wu.metadata, MetadataChangeEventClass): - set_aspect( - wu.metadata, None, UpstreamLineageClass - ) # we'll handle upstreamLineage separately below + lineage_aspect = wu.get_aspect_of_type(UpstreamLineageClass) + set_aspect(wu.metadata, None, UpstreamLineageClass) if len(wu.metadata.proposedSnapshot.aspects) > 0: yield wu - if lineage_aspect: + if lineage_aspect and lineage_aspect.upstreams: + yield convert_upstream_lineage_to_patch( + urn, lineage_aspect, wu.metadata.systemMetadata + ) + elif isinstance(wu.metadata, MetadataChangeProposalWrapper) and isinstance( + wu.metadata.aspect, UpstreamLineageClass + ): + lineage_aspect = wu.metadata.aspect if lineage_aspect.upstreams: yield convert_upstream_lineage_to_patch( urn, lineage_aspect, wu.metadata.systemMetadata ) + else: + yield wu class IncrementalLineageConfigMixin(ConfigModel): diff --git a/metadata-ingestion/src/datahub/ingestion/api/ingestion_job_checkpointing_provider_base.py b/metadata-ingestion/src/datahub/ingestion/api/ingestion_job_checkpointing_provider_base.py index 285ad9c088447..3680546d307d9 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/ingestion_job_checkpointing_provider_base.py +++ b/metadata-ingestion/src/datahub/ingestion/api/ingestion_job_checkpointing_provider_base.py @@ -26,9 +26,7 @@ class IngestionCheckpointingProviderBase(StatefulCommittable[CheckpointJobStates The base class for all checkpointing state provider implementations. """ - def __init__( - self, name: str, commit_policy: CommitPolicy = CommitPolicy.ON_NO_ERRORS - ): + def __init__(self, name: str, commit_policy: CommitPolicy = CommitPolicy.ALWAYS): # Set the initial state to an empty dict. super().__init__(name, commit_policy, {}) diff --git a/metadata-ingestion/src/datahub/ingestion/api/report.py b/metadata-ingestion/src/datahub/ingestion/api/report.py index 08b20d9e85691..4a74d6cbc6268 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/report.py +++ b/metadata-ingestion/src/datahub/ingestion/api/report.py @@ -5,12 +5,12 @@ from dataclasses import dataclass from datetime import datetime, timedelta from enum import Enum -from typing import Any, Optional +from typing import Any, Optional, runtime_checkable import humanfriendly import pydantic from pydantic import BaseModel -from typing_extensions import Literal, Protocol, runtime_checkable +from typing_extensions import Literal, Protocol from datahub.ingestion.api.report_helpers import format_datetime_relative from datahub.utilities.lossy_collections import LossyList diff --git a/metadata-ingestion/src/datahub/ingestion/api/source.py b/metadata-ingestion/src/datahub/ingestion/api/source.py index 549f67f05abd7..f6755cb09d98e 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/source.py +++ b/metadata-ingestion/src/datahub/ingestion/api/source.py @@ -37,6 +37,7 @@ ) from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent +from datahub.metadata.schema_classes import UpstreamLineageClass from datahub.utilities.lossy_collections import LossyDict, LossyList from datahub.utilities.type_annotations import get_class_from_annotation @@ -70,6 +71,9 @@ class SourceReport(Report): aspects: Dict[str, Dict[str, int]] = field( default_factory=lambda: defaultdict(lambda: defaultdict(int)) ) + aspect_urn_samples: Dict[str, Dict[str, LossyList[str]]] = field( + default_factory=lambda: defaultdict(lambda: defaultdict(LossyList)) + ) warnings: LossyDict[str, LossyList[str]] = field(default_factory=LossyDict) failures: LossyDict[str, LossyList[str]] = field(default_factory=LossyDict) @@ -96,6 +100,13 @@ def report_workunit(self, wu: WorkUnit) -> None: if aspectName is not None: # usually true self.aspects[entityType][aspectName] += 1 + self.aspect_urn_samples[entityType][aspectName].append(urn) + if isinstance(mcp.aspect, UpstreamLineageClass): + upstream_lineage = cast(UpstreamLineageClass, mcp.aspect) + if upstream_lineage.fineGrainedLineages: + self.aspect_urn_samples[entityType][ + "fineGrainedLineages" + ].append(urn) def report_warning(self, key: str, reason: str) -> None: warnings = self.warnings.get(key, LossyList()) diff --git a/metadata-ingestion/src/datahub/ingestion/extractor/json_ref_patch.py b/metadata-ingestion/src/datahub/ingestion/extractor/json_ref_patch.py index daf43bd87ba60..2224a096f5387 100644 --- a/metadata-ingestion/src/datahub/ingestion/extractor/json_ref_patch.py +++ b/metadata-ingestion/src/datahub/ingestion/extractor/json_ref_patch.py @@ -15,7 +15,7 @@ def title_swapping_callback(self: JsonRef) -> dict: try: base_doc = self.loader(uri) except Exception as e: - raise self._error("%s: %s" % (e.__class__.__name__, str(e)), cause=e) from e + raise self._error(f"{e.__class__.__name__}: {str(e)}", cause=e) from e base_doc = _replace_refs( base_doc, **{**self._ref_kwargs, "base_uri": uri, "recursing": False} ) diff --git a/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py b/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py index df0b732833fbe..d5af4f7a2389c 100644 --- a/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py +++ b/metadata-ingestion/src/datahub/ingestion/extractor/schema_util.py @@ -436,8 +436,7 @@ def gen_items_from_list_tuple_or_scalar( val: Any, ) -> Iterable[avro.schema.Schema]: if isinstance(val, (list, tuple)): - for i in val: - yield i + yield from val else: yield val diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py index 859b150757cdf..be3aa2e80780a 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/client.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py @@ -324,6 +324,7 @@ def get_ownership(self, entity_urn: str) -> Optional[OwnershipClass]: def get_schema_metadata(self, entity_urn: str) -> Optional[SchemaMetadataClass]: return self.get_aspect(entity_urn=entity_urn, aspect_type=SchemaMetadataClass) + @deprecated(reason="Use get_aspect directly.") def get_domain_properties(self, entity_urn: str) -> Optional[DomainPropertiesClass]: return self.get_aspect(entity_urn=entity_urn, aspect_type=DomainPropertiesClass) @@ -343,11 +344,9 @@ def get_glossary_terms(self, entity_urn: str) -> Optional[GlossaryTermsClass]: def get_domain(self, entity_urn: str) -> Optional[DomainsClass]: return self.get_aspect(entity_urn=entity_urn, aspect_type=DomainsClass) + @deprecated(reason="Use get_aspect directly.") def get_browse_path(self, entity_urn: str) -> Optional[BrowsePathsClass]: - return self.get_aspect( - entity_urn=entity_urn, - aspect_type=BrowsePathsClass, - ) + return self.get_aspect(entity_urn=entity_urn, aspect_type=BrowsePathsClass) def get_usage_aspects_from_urn( self, entity_urn: str, start_timestamp: int, end_timestamp: int @@ -419,26 +418,47 @@ def get_latest_timeseries_value( {"field": k, "value": v, "condition": "EQUAL"} for k, v in filter_criteria_map.items() ] + filter = {"or": [{"and": filter_criteria}]} + + values = self.get_timeseries_values( + entity_urn=entity_urn, aspect_type=aspect_type, filter=filter, limit=1 + ) + if not values: + return None + + assert len(values) == 1, len(values) + return values[0] + + def get_timeseries_values( + self, + entity_urn: str, + aspect_type: Type[Aspect], + filter: Dict[str, Any], + limit: int = 10, + ) -> List[Aspect]: query_body = { "urn": entity_urn, "entity": guess_entity_type(entity_urn), "aspect": aspect_type.ASPECT_NAME, - "limit": 1, - "filter": {"or": [{"and": filter_criteria}]}, + "limit": limit, + "filter": filter, } end_point = f"{self.config.server}/aspects?action=getTimeseriesAspectValues" resp: Dict = self._post_generic(end_point, query_body) - values: list = resp.get("value", {}).get("values") - if values: - assert len(values) == 1, len(values) - aspect_json: str = values[0].get("aspect", {}).get("value") + + values: Optional[List] = resp.get("value", {}).get("values") + aspects: List[Aspect] = [] + for value in values or []: + aspect_json: str = value.get("aspect", {}).get("value") if aspect_json: - return aspect_type.from_obj(json.loads(aspect_json), tuples=False) + aspects.append( + aspect_type.from_obj(json.loads(aspect_json), tuples=False) + ) else: raise GraphError( f"Failed to find {aspect_type} in response {aspect_json}" ) - return None + return aspects def get_entity_raw( self, entity_urn: str, aspects: Optional[List[str]] = None @@ -1074,7 +1094,7 @@ def delete_references_to_urn( related_aspects = response.get("relatedAspects", []) return reference_count, related_aspects - @functools.lru_cache() + @functools.lru_cache def _make_schema_resolver( self, platform: str, diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/aws_common.py b/metadata-ingestion/src/datahub/ingestion/source/aws/aws_common.py index 95ca10045f1bb..ba358d2465bbc 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/aws_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/aws_common.py @@ -14,6 +14,7 @@ from datahub.configuration.source_common import EnvConfigMixin if TYPE_CHECKING: + from mypy_boto3_dynamodb import DynamoDBClient from mypy_boto3_glue import GlueClient from mypy_boto3_s3 import S3Client, S3ServiceResource from mypy_boto3_sagemaker import SageMakerClient @@ -214,6 +215,9 @@ def get_s3_resource( def get_glue_client(self) -> "GlueClient": return self.get_session().client("glue", config=self._aws_config()) + def get_dynamodb_client(self) -> "DynamoDBClient": + return self.get_session().client("dynamodb", config=self._aws_config()) + def get_sagemaker_client(self) -> "SageMakerClient": return self.get_session().client("sagemaker", config=self._aws_config()) @@ -224,6 +228,7 @@ class AwsSourceConfig(EnvConfigMixin, AwsConnectionConfig): Currently used by: - Glue source + - DynamoDB source - SageMaker source """ diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py index 0ac13b256eb03..a6393aa9d0ced 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/glue.py @@ -1,3 +1,4 @@ +import json import logging from collections import defaultdict from dataclasses import dataclass, field as dataclass_field @@ -98,6 +99,7 @@ UpstreamClass, UpstreamLineageClass, ) +from datahub.utilities.delta import delta_type_to_hive_type from datahub.utilities.hive_schema_to_avro import get_schema_fields_for_hive_column logger = logging.getLogger(__name__) @@ -161,6 +163,10 @@ class GlueSourceConfig( stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = Field( default=None, description="" ) + extract_delta_schema_from_parameters: Optional[bool] = Field( + default=False, + description="If enabled, delta schemas can be alternatively fetched from table parameters.", + ) def is_profiling_enabled(self) -> bool: return self.profiling is not None and is_profiling_enabled( @@ -204,6 +210,8 @@ class GlueSourceReport(StaleEntityRemovalSourceReport): num_job_script_failed_parsing: int = 0 num_job_without_nodes: int = 0 num_dataset_to_dataset_edges_in_job: int = 0 + num_dataset_invalid_delta_schema: int = 0 + num_dataset_valid_delta_schema: int = 0 def report_table_scanned(self) -> None: self.tables_scanned += 1 @@ -1009,7 +1017,7 @@ def _transform_extraction(self) -> Iterable[MetadataWorkUnit]: # in Glue, it's possible for two buckets to have files of different extensions # if this happens, we append the extension in the URN so the sources can be distinguished # see process_dataflow_node() for details - s3_formats: DefaultDict[str, Set[Optional[str]]] = defaultdict(lambda: set()) + s3_formats: DefaultDict[str, Set[Optional[str]]] = defaultdict(set) for dag in dags.values(): if dag is not None: for s3_name, extension in self.get_dataflow_s3_names(dag): @@ -1147,10 +1155,41 @@ def get_s3_tags() -> Optional[GlobalTagsClass]: ) return new_tags + def _is_delta_schema( + provider: str, num_parts: int, columns: Optional[List[Mapping[str, Any]]] + ) -> bool: + return ( + (self.source_config.extract_delta_schema_from_parameters is True) + and (provider == "delta") + and (num_parts > 0) + and (columns is not None) + and (len(columns) == 1) + and (columns[0].get("Name", "") == "col") + and (columns[0].get("Type", "") == "array") + ) + def get_schema_metadata() -> Optional[SchemaMetadata]: - if not table.get("StorageDescriptor"): + # As soon as the hive integration with Spark is correctly providing the schema as expected in the + # StorageProperties, the alternative path to fetch schema from table parameters for delta schemas can be removed. + # https://github.com/delta-io/delta/pull/2310 + provider = table.get("Parameters", {}).get("spark.sql.sources.provider", "") + num_parts = int( + table.get("Parameters", {}).get( + "spark.sql.sources.schema.numParts", "0" + ) + ) + columns = table.get("StorageDescriptor", {}).get("Columns", [{}]) + + if _is_delta_schema(provider, num_parts, columns): + return _get_delta_schema_metadata() + + elif table.get("StorageDescriptor"): + return _get_glue_schema_metadata() + + else: return None + def _get_glue_schema_metadata() -> Optional[SchemaMetadata]: schema = table["StorageDescriptor"]["Columns"] fields: List[SchemaField] = [] for field in schema: @@ -1183,6 +1222,51 @@ def get_schema_metadata() -> Optional[SchemaMetadata]: platformSchema=MySqlDDL(tableSchema=""), ) + def _get_delta_schema_metadata() -> Optional[SchemaMetadata]: + assert ( + table["Parameters"]["spark.sql.sources.provider"] == "delta" + and int(table["Parameters"]["spark.sql.sources.schema.numParts"]) > 0 + ) + + try: + numParts = int(table["Parameters"]["spark.sql.sources.schema.numParts"]) + schema_str = "".join( + [ + table["Parameters"][f"spark.sql.sources.schema.part.{i}"] + for i in range(numParts) + ] + ) + schema_json = json.loads(schema_str) + fields: List[SchemaField] = [] + for field in schema_json["fields"]: + field_type = delta_type_to_hive_type(field.get("type", "unknown")) + schema_fields = get_schema_fields_for_hive_column( + hive_column_name=field["name"], + hive_column_type=field_type, + description=field.get("description"), + default_nullable=bool(field.get("nullable", True)), + ) + assert schema_fields + fields.extend(schema_fields) + + self.report.num_dataset_valid_delta_schema += 1 + return SchemaMetadata( + schemaName=table_name, + version=0, + fields=fields, + platform=f"urn:li:dataPlatform:{self.platform}", + hash="", + platformSchema=MySqlDDL(tableSchema=""), + ) + + except Exception as e: + self.report_warning( + dataset_urn, + f"Could not parse schema for {table_name} because of {type(e).__name__}: {e}", + ) + self.report.num_dataset_invalid_delta_schema += 1 + return None + def get_data_platform_instance() -> DataPlatformInstanceClass: return DataPlatformInstanceClass( platform=make_data_platform_urn(self.platform), diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/job_classes.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/job_classes.py index 442c5eb2e0a8f..6e0e352db4af7 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/job_classes.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/job_classes.py @@ -1,6 +1,4 @@ -from typing import Dict - -from typing_extensions import Final +from typing import Dict, Final from datahub.metadata.schema_classes import JobStatusClass diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py index eb59d720f1372..eecc0f4372969 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py @@ -178,6 +178,8 @@ def cleanup(config: BigQueryV2Config) -> None: ) class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource): # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types + # Note: We use the hive schema parser to parse nested BigQuery types. We also have + # some extra type mappings in that file. BIGQUERY_FIELD_TYPE_MAPPINGS: Dict[ str, Type[ @@ -221,7 +223,7 @@ class BigqueryV2Source(StatefulIngestionSourceBase, TestableSource): } def __init__(self, ctx: PipelineContext, config: BigQueryV2Config): - super(BigqueryV2Source, self).__init__(config, ctx) + super().__init__(config, ctx) self.config: BigQueryV2Config = config self.report: BigQueryV2Report = BigQueryV2Report() self.classification_handler = ClassificationHandler(self.config, self.report) @@ -259,7 +261,7 @@ def __init__(self, ctx: PipelineContext, config: BigQueryV2Config): self.lineage_extractor = BigqueryLineageExtractor( config, self.report, - dataset_urn_builder=self.gen_dataset_urn_from_ref, + dataset_urn_builder=self.gen_dataset_urn_from_raw_ref, redundant_run_skip_handler=redundant_lineage_run_skip_handler, ) @@ -276,7 +278,7 @@ def __init__(self, ctx: PipelineContext, config: BigQueryV2Config): config, self.report, schema_resolver=self.sql_parser_schema_resolver, - dataset_urn_builder=self.gen_dataset_urn_from_ref, + dataset_urn_builder=self.gen_dataset_urn_from_raw_ref, redundant_run_skip_handler=redundant_usage_run_skip_handler, ) @@ -338,7 +340,7 @@ def metadata_read_capability_test( ) -> CapabilityReport: for project_id in project_ids: try: - logger.info((f"Metadata read capability test for project {project_id}")) + logger.info(f"Metadata read capability test for project {project_id}") client: bigquery.Client = config.get_bigquery_client() assert client bigquery_data_dictionary = BigQuerySchemaApi( @@ -1187,14 +1189,28 @@ def gen_tags_aspect_workunit( entityUrn=dataset_urn, aspect=tags ).as_workunit() - def gen_dataset_urn(self, project_id: str, dataset_name: str, table: str) -> str: + def gen_dataset_urn( + self, project_id: str, dataset_name: str, table: str, use_raw_name: bool = False + ) -> str: datahub_dataset_name = BigqueryTableIdentifier(project_id, dataset_name, table) return make_dataset_urn( self.platform, - str(datahub_dataset_name), + ( + str(datahub_dataset_name) + if not use_raw_name + else datahub_dataset_name.raw_table_name() + ), self.config.env, ) + def gen_dataset_urn_from_raw_ref(self, ref: BigQueryTableRef) -> str: + return self.gen_dataset_urn( + ref.table_identifier.project_id, + ref.table_identifier.dataset, + ref.table_identifier.table, + use_raw_name=True, + ) + def gen_dataset_urn_from_ref(self, ref: BigQueryTableRef) -> str: return self.gen_dataset_urn( ref.table_identifier.project_id, @@ -1264,7 +1280,6 @@ def gen_schema_fields(self, columns: List[BigqueryColumn]) -> List[SchemaField]: type=SchemaFieldDataType( self.BIGQUERY_FIELD_TYPE_MAPPINGS.get(col.data_type, NullType)() ), - # NOTE: nativeDataType will not be in sync with older connector nativeDataType=col.data_type, description=col.comment, nullable=col.is_nullable, diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py index d918782691c77..ca09496eda341 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py @@ -275,7 +275,7 @@ def _make_bigquery_table( table.get("last_altered") / 1000, tz=timezone.utc ) if table.get("last_altered") is not None - else table.created, + else None, size_in_bytes=table.get("bytes"), rows_count=table.get("row_count"), comment=table.comment, @@ -339,7 +339,7 @@ def _make_bigquery_view(view: bigquery.Row) -> BigqueryView: view.get("last_altered") / 1000, tz=timezone.utc ) if view.get("last_altered") is not None - else view.created, + else None, comment=view.comment, view_definition=view.view_definition, materialized=view.table_type == BigqueryTableType.MATERIALIZED_VIEW, @@ -487,7 +487,7 @@ def _make_bigquery_table_snapshot(snapshot: bigquery.Row) -> BigqueryTableSnapsh snapshot.get("last_altered") / 1000, tz=timezone.utc ) if snapshot.get("last_altered") is not None - else snapshot.created, + else None, comment=snapshot.comment, ddl=snapshot.ddl, snapshot_time=snapshot.snapshot_time, diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py index c8c1e7c893c6c..c41207ec67f62 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py @@ -551,22 +551,20 @@ def lineage_via_catalog_lineage_api( # Only builds lineage map when the table has upstreams logger.debug("Found %d upstreams for table %s", len(upstreams), table) if upstreams: - lineage_map[destination_table_str] = set( - [ - LineageEdge( - table=str( - BigQueryTableRef( - table_identifier=BigqueryTableIdentifier.from_string_name( - source_table - ) + lineage_map[destination_table_str] = { + LineageEdge( + table=str( + BigQueryTableRef( + table_identifier=BigqueryTableIdentifier.from_string_name( + source_table ) - ), - column_mapping=frozenset(), - auditStamp=curr_date, - ) - for source_table in upstreams - ] - ) + ) + ), + column_mapping=frozenset(), + auditStamp=curr_date, + ) + for source_table in upstreams + } return lineage_map except Exception as e: self.error( diff --git a/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py b/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py index 29f7f786b0a49..84547efe37a62 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py +++ b/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py @@ -48,6 +48,7 @@ class BIContainerSubTypes(str, Enum): QLIK_APP = "Qlik App" SIGMA_WORKSPACE = "Sigma Workspace" SIGMA_WORKBOOK = "Sigma Workbook" + MODE_COLLECTION = "Collection" class JobContainerSubTypes(str, Enum): @@ -64,3 +65,8 @@ class BIAssetSubTypes(str, Enum): # PowerBI POWERBI_TILE = "PowerBI Tile" POWERBI_PAGE = "PowerBI Page" + + # Mode + MODE_REPORT = "Report" + MODE_QUERY = "Query" + MODE_CHART = "Chart" diff --git a/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py b/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py index ec3d1715aaece..d998c37d32ed2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py +++ b/metadata-ingestion/src/datahub/ingestion/source/csv_enricher.py @@ -154,9 +154,7 @@ def get_resource_glossary_terms_work_unit( # If we want to overwrite or there are no existing terms, create a new GlossaryTerms object current_terms = GlossaryTermsClass(term_associations, get_audit_stamp()) else: - current_term_urns: Set[str] = set( - [term.urn for term in current_terms.terms] - ) + current_term_urns: Set[str] = {term.urn for term in current_terms.terms} term_associations_filtered: List[GlossaryTermAssociationClass] = [ association for association in term_associations @@ -192,7 +190,7 @@ def get_resource_tags_work_unit( # If we want to overwrite or there are no existing tags, create a new GlobalTags object current_tags = GlobalTagsClass(tag_associations) else: - current_tag_urns: Set[str] = set([tag.tag for tag in current_tags.tags]) + current_tag_urns: Set[str] = {tag.tag for tag in current_tags.tags} tag_associations_filtered: List[TagAssociationClass] = [ association for association in tag_associations @@ -453,9 +451,9 @@ def process_sub_resource_row( field_match = True if has_terms: if field_info.glossaryTerms and not self.should_overwrite: - current_term_urns = set( - [term.urn for term in field_info.glossaryTerms.terms] - ) + current_term_urns = { + term.urn for term in field_info.glossaryTerms.terms + } term_associations_filtered = [ association for association in term_associations @@ -472,9 +470,9 @@ def process_sub_resource_row( if has_tags: if field_info.globalTags and not self.should_overwrite: - current_tag_urns = set( - [tag.tag for tag in field_info.globalTags.tags] - ) + current_tag_urns = { + tag.tag for tag in field_info.globalTags.tags + } tag_associations_filtered = [ association for association in tag_associations @@ -631,9 +629,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: f"Cannot read remote file {self.config.filename}, error:{e}" ) else: - with open( - pathlib.Path(self.config.filename), mode="r", encoding="utf-8-sig" - ) as f: + with open(pathlib.Path(self.config.filename), encoding="utf-8-sig") as f: rows = list(csv.DictReader(f, delimiter=self.config.delimiter)) for row in rows: diff --git a/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/data_lake_utils.py b/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/data_lake_utils.py index b04718a9eabba..5393dd4835d8c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/data_lake_utils.py +++ b/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/data_lake_utils.py @@ -58,8 +58,7 @@ def create_emit_containers( ) self.processed_containers.append(container_key.guid()) logger.debug(f"Creating container with key: {container_key}") - for wu in container_wus: - yield wu + yield from container_wus def gen_folder_key(self, abs_path): return FolderKey( diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py index 2c5a4b7af8836..820d85b2cfb51 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_cloud.py @@ -375,7 +375,7 @@ def _parse_into_dbt_node(self, node: Dict) -> DBTNode: max_loaded_at = None columns = [] - if "columns" in node: + if "columns" in node and node["columns"] is not None: # columns will be empty for ephemeral models columns = [ self._parse_into_dbt_column(column) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index b2d93b2e0fd6f..3b686ef60de29 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -795,14 +795,17 @@ def make_mapping_upstream_lineage( def get_column_type( - report: DBTSourceReport, dataset_name: str, column_type: str, dbt_adapter: str + report: DBTSourceReport, + dataset_name: str, + column_type: Optional[str], + dbt_adapter: str, ) -> SchemaFieldDataType: """ Maps known DBT types to datahub types """ - TypeClass: Any = _field_type_mapping.get(column_type) + TypeClass: Any = _field_type_mapping.get(column_type) if column_type else None - if TypeClass is None: + if TypeClass is None and column_type: # resolve a modified type if dbt_adapter == "trino": TypeClass = resolve_trino_modified_type(column_type) @@ -934,12 +937,14 @@ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: def _make_data_platform_instance_aspect(self) -> DataPlatformInstanceClass: return DataPlatformInstanceClass( platform=mce_builder.make_data_platform_urn(DBT_PLATFORM), - instance=mce_builder.make_dataplatform_instance_urn( - mce_builder.make_data_platform_urn(DBT_PLATFORM), - self.config.platform_instance, - ) - if self.config.platform_instance - else None, + instance=( + mce_builder.make_dataplatform_instance_urn( + mce_builder.make_data_platform_urn(DBT_PLATFORM), + self.config.platform_instance, + ) + if self.config.platform_instance + else None + ), ) def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: @@ -1121,24 +1126,25 @@ def _infer_schemas_and_update_cll( # noqa: C901 elif node.compiled_code: try: # Add CTE stops based on the upstreams list. + cte_mapping = { + cte_name: upstream_node.get_fake_ephemeral_table_name() + for upstream_node in [ + all_nodes_map[upstream_node_name] + for upstream_node_name in node.upstream_nodes + if upstream_node_name in all_nodes_map + ] + if upstream_node.is_ephemeral_model() + for cte_name in _get_dbt_cte_names( + upstream_node.name, schema_resolver.platform + ) + } preprocessed_sql = detach_ctes( parse_statements_and_pick( node.compiled_code, platform=schema_resolver.platform, ), platform=schema_resolver.platform, - cte_mapping={ - cte_name: upstream_node.get_fake_ephemeral_table_name() - for upstream_node in [ - all_nodes_map[upstream_node_name] - for upstream_node_name in node.upstream_nodes - if upstream_node_name in all_nodes_map - ] - if upstream_node.is_ephemeral_model() - for cte_name in _get_dbt_cte_names( - upstream_node.name, schema_resolver.platform - ) - }, + cte_mapping=cte_mapping, ) except Exception as e: self.report.sql_parser_detach_ctes_failures.append(node.dbt_name) @@ -1809,9 +1815,9 @@ def _translate_dbt_name_to_upstream_urn(dbt_name: str) -> str: ) for upstream in upstream_urns ], - fineGrainedLineages=(cll or None) - if self.config.include_column_lineage - else None, + fineGrainedLineages=( + (cll or None) if self.config.include_column_lineage else None + ), ) # This method attempts to read-modify and return the owners of a dataset. @@ -1848,7 +1854,7 @@ def get_transformed_tags_by_prefix( entity_urn: str, tags_prefix_filter: str, ) -> List[TagAssociationClass]: - tag_set = set([new_tag.tag for new_tag in new_tags]) + tag_set = {new_tag.tag for new_tag in new_tags} if self.ctx.graph: existing_tags_class = self.ctx.graph.get_tags(entity_urn) @@ -1863,7 +1869,7 @@ def get_transformed_tags_by_prefix( def get_transformed_terms( self, new_terms: List[GlossaryTermAssociation], entity_urn: str ) -> List[GlossaryTermAssociation]: - term_id_set = set([term.urn for term in new_terms]) + term_id_set = {term.urn for term in new_terms} if self.ctx.graph: existing_terms_class = self.ctx.graph.get_glossary_terms(entity_urn) if existing_terms_class and existing_terms_class.terms: diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py index 0fc35ddd281c8..c78cfdf0b4f0f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_core.py @@ -118,7 +118,7 @@ def get_columns( # information from the manifest file. logger.debug(f"Inferring schema info for {dbt_name} from manifest") catalog_columns = { - k: {"name": col["name"], "type": col["data_type"], "index": i} + k: {"name": col["name"], "type": col["data_type"] or "", "index": i} for i, (k, col) in enumerate(manifest_columns.items()) } else: @@ -481,7 +481,7 @@ def load_file_as_json( ) return json.loads(response["Body"].read().decode("utf-8")) else: - with open(uri, "r") as f: + with open(uri) as f: return json.load(f) def loadManifestAndCatalog( diff --git a/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py b/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py index 39066b0c26553..6a52d8fdd8905 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py @@ -2,7 +2,7 @@ import logging import os import time -from typing import Any, Dict, Iterable, List +from typing import Dict, Iterable, List from urllib.parse import urlparse from deltalake import DeltaTable @@ -51,6 +51,7 @@ SchemaFieldClass, ) from datahub.telemetry import telemetry +from datahub.utilities.delta import delta_type_to_hive_type from datahub.utilities.hive_schema_to_avro import get_schema_fields_for_hive_column logging.getLogger("py4j").setLevel(logging.ERROR) @@ -126,46 +127,12 @@ def create(cls, config_dict: dict, ctx: PipelineContext) -> "Source": config = DeltaLakeSourceConfig.parse_obj(config_dict) return cls(config, ctx) - def delta_type_to_hive_type(self, field_type: Any) -> str: - if isinstance(field_type, str): - """ - return the field type - """ - return field_type - else: - if field_type.get("type") == "array": - """ - if array is of complex type, recursively parse the - fields and create the native datatype - """ - return ( - "array<" - + self.delta_type_to_hive_type(field_type.get("elementType")) - + ">" - ) - elif field_type.get("type") == "struct": - parsed_struct = "" - for field in field_type.get("fields"): - """ - if field is of complex type, recursively parse - and create the native datatype - """ - parsed_struct += ( - "{0}:{1}".format( - field.get("name"), - self.delta_type_to_hive_type(field.get("type")), - ) - + "," - ) - return "struct<" + parsed_struct.rstrip(",") + ">" - return "" - def _parse_datatype(self, raw_field_json_str: str) -> List[SchemaFieldClass]: raw_field_json = json.loads(raw_field_json_str) # get the parent field name and type field_name = raw_field_json.get("name") - field_type = self.delta_type_to_hive_type(raw_field_json.get("type")) + field_type = delta_type_to_hive_type(raw_field_json.get("type")) return get_schema_fields_for_hive_column(field_name, field_type) @@ -343,8 +310,7 @@ def process_folder(self, path: str) -> Iterable[MetadataWorkUnit]: delta_table = read_delta_table(path, self.storage_options, self.source_config) if delta_table: logger.debug(f"Delta table found at: {path}") - for wu in self.ingest_table(delta_table, path.rstrip("/")): - yield wu + yield from self.ingest_table(delta_table, path.rstrip("/")) else: for folder in self.get_folders(path): yield from self.process_folder(folder) diff --git a/metadata-ingestion/src/datahub/ingestion/source/dynamodb/dynamodb.py b/metadata-ingestion/src/datahub/ingestion/source/dynamodb/dynamodb.py index 3d85238d9422f..e3933b985c28a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dynamodb/dynamodb.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dynamodb/dynamodb.py @@ -12,8 +12,6 @@ Union, ) -import boto3 -import pydantic from pydantic.fields import Field from datahub.configuration.common import AllowDenyPattern @@ -42,6 +40,7 @@ ClassificationSourceConfigMixin, classification_workunit_processor, ) +from datahub.ingestion.source.aws.aws_common import AwsSourceConfig from datahub.ingestion.source.dynamodb.data_reader import DynamoDBTableItemsReader from datahub.ingestion.source.schema_inference.object import SchemaDescription from datahub.ingestion.source.state.stale_entity_removal_handler import ( @@ -93,12 +92,8 @@ class DynamoDBConfig( DatasetSourceConfigMixin, StatefulIngestionConfigBase, ClassificationSourceConfigMixin, + AwsSourceConfig, ): - # TODO: refactor the config to use AwsConnectionConfig and create a method get_dynamodb_client - # in the class to provide optional region name input - aws_access_key_id: str = Field(description="AWS Access Key ID.") - aws_secret_access_key: pydantic.SecretStr = Field(description="AWS Secret Key.") - domain: Dict[str, AllowDenyPattern] = Field( default=dict(), description="regex patterns for tables to filter to assign domain_key. ", @@ -120,6 +115,10 @@ class DynamoDBConfig( # Custom Stateful Ingestion settings stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None + @property + def dynamodb_client(self): + return self.get_dynamodb_client() + @dataclass class DynamoDBSourceReport(StaleEntityRemovalSourceReport, ClassificationReportMixin): @@ -212,41 +211,27 @@ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: ] def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: - # This is a offline call to get available region names from botocore library - session = boto3.Session() - dynamodb_regions = session.get_available_regions("dynamodb") - logger.info(f"region names {dynamodb_regions}") - - # traverse databases in sorted order so output is consistent - for region in dynamodb_regions: - logger.info(f"Processing region {region}") - # create a new dynamodb client for each region, - # it seems for one client we could only list the table of one specific region, - # the list_tables() method don't take any config that related to region - dynamodb_client = boto3.client( - "dynamodb", - region_name=region, - aws_access_key_id=self.config.aws_access_key_id, - aws_secret_access_key=self.config.aws_secret_access_key.get_secret_value(), - ) - data_reader = DynamoDBTableItemsReader.create(dynamodb_client) + dynamodb_client = self.config.dynamodb_client + region = dynamodb_client.meta.region_name - for table_name in self._list_tables(dynamodb_client): - dataset_name = f"{region}.{table_name}" - if not self.config.table_pattern.allowed(dataset_name): - logger.debug(f"skipping table: {dataset_name}") - self.report.report_dropped(dataset_name) - continue + data_reader = DynamoDBTableItemsReader.create(dynamodb_client) - table_wu_generator = self._process_table( - region, dynamodb_client, table_name, dataset_name - ) - yield from classification_workunit_processor( - table_wu_generator, - self.classification_handler, - data_reader, - [region, table_name], - ) + for table_name in self._list_tables(dynamodb_client): + dataset_name = f"{region}.{table_name}" + if not self.config.table_pattern.allowed(dataset_name): + logger.debug(f"skipping table: {dataset_name}") + self.report.report_dropped(dataset_name) + continue + + table_wu_generator = self._process_table( + region, dynamodb_client, table_name, dataset_name + ) + yield from classification_workunit_processor( + table_wu_generator, + self.classification_handler, + data_reader, + [region, table_name], + ) def _process_table( self, diff --git a/metadata-ingestion/src/datahub/ingestion/source/file.py b/metadata-ingestion/src/datahub/ingestion/source/file.py index 590aa59f7b5b6..49cc314426eb5 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/file.py +++ b/metadata-ingestion/src/datahub/ingestion/source/file.py @@ -256,7 +256,7 @@ def _iterate_file(self, path: str) -> Iterable[Tuple[int, Any]]: file_read_mode = self.config.read_mode if file_read_mode == FileReadMode.BATCH: - with open(path, "r") as f: + with open(path) as f: parse_start_time = datetime.datetime.now() obj_list = json.load(f) parse_end_time = datetime.datetime.now() diff --git a/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran.py b/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran.py index 91b0101c10451..c8ae779b602b8 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran.py +++ b/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran.py @@ -66,7 +66,7 @@ class FivetranSource(StatefulIngestionSourceBase): platform: str = "fivetran" def __init__(self, config: FivetranSourceConfig, ctx: PipelineContext): - super(FivetranSource, self).__init__(config, ctx) + super().__init__(config, ctx) self.config = config self.report = FivetranSourceReport() diff --git a/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_log_api.py b/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_log_api.py index 96a4ec44ae67b..a9eb59f929799 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_log_api.py +++ b/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_log_api.py @@ -76,29 +76,71 @@ def _initialize_fivetran_variables( ) def _query(self, query: str) -> List[Dict]: - logger.debug("Query : {}".format(query)) + logger.debug(f"Query : {query}") resp = self.engine.execute(query) return [row for row in resp] - def _get_table_lineage(self, connector_id: str) -> List[TableLineage]: + def _get_column_lineage_metadata(self) -> Dict[str, List]: + """ + Return's dict of column lineage metadata with key as '-' + """ + all_column_lineage: Dict[str, List] = {} + column_lineage_result = self._query( + self.fivetran_log_query.get_column_lineage_query() + ) + for column_lineage in column_lineage_result: + key = f"{column_lineage[Constant.SOURCE_TABLE_ID]}-{column_lineage[Constant.DESTINATION_TABLE_ID]}" + if key not in all_column_lineage: + all_column_lineage[key] = [column_lineage] + else: + all_column_lineage[key].append(column_lineage) + return all_column_lineage + + def _get_connectors_table_lineage_metadata(self) -> Dict[str, List]: + """ + Return's dict of table lineage metadata with key as 'CONNECTOR_ID' + """ + connectors_table_lineage_metadata: Dict[str, List] = {} table_lineage_result = self._query( - self.fivetran_log_query.get_table_lineage_query(connector_id=connector_id) + self.fivetran_log_query.get_table_lineage_query() ) + for table_lineage in table_lineage_result: + if ( + table_lineage[Constant.CONNECTOR_ID] + not in connectors_table_lineage_metadata + ): + connectors_table_lineage_metadata[ + table_lineage[Constant.CONNECTOR_ID] + ] = [table_lineage] + else: + connectors_table_lineage_metadata[ + table_lineage[Constant.CONNECTOR_ID] + ].append(table_lineage) + return connectors_table_lineage_metadata + + def _get_table_lineage( + self, + column_lineage_metadata: Dict[str, List], + table_lineage_result: Optional[List], + ) -> List[TableLineage]: table_lineage_list: List[TableLineage] = [] + if table_lineage_result is None: + return table_lineage_list for table_lineage in table_lineage_result: - column_lineage_result = self._query( - self.fivetran_log_query.get_column_lineage_query( - source_table_id=table_lineage[Constant.SOURCE_TABLE_ID], - destination_table_id=table_lineage[Constant.DESTINATION_TABLE_ID], - ) + column_lineage_result = column_lineage_metadata.get( + f"{table_lineage[Constant.SOURCE_TABLE_ID]}-{table_lineage[Constant.DESTINATION_TABLE_ID]}" ) - column_lineage_list: List[ColumnLineage] = [ - ColumnLineage( - source_column=column_lineage[Constant.SOURCE_COLUMN_NAME], - destination_column=column_lineage[Constant.DESTINATION_COLUMN_NAME], - ) - for column_lineage in column_lineage_result - ] + column_lineage_list: List[ColumnLineage] = [] + if column_lineage_result: + column_lineage_list = [ + ColumnLineage( + source_column=column_lineage[Constant.SOURCE_COLUMN_NAME], + destination_column=column_lineage[ + Constant.DESTINATION_COLUMN_NAME + ], + ) + for column_lineage in column_lineage_result + ] table_lineage_list.append( TableLineage( source_table=f"{table_lineage[Constant.SOURCE_SCHEMA_NAME]}.{table_lineage[Constant.SOURCE_TABLE_NAME]}", @@ -109,30 +151,44 @@ def _get_table_lineage(self, connector_id: str) -> List[TableLineage]: return table_lineage_list - def _get_jobs_list(self, connector_id: str) -> List[Job]: + def _get_all_connector_sync_logs(self) -> Dict[str, Dict]: + sync_logs = {} + for row in self._query(self.fivetran_log_query.get_sync_logs_query()): + if row[Constant.CONNECTOR_ID] not in sync_logs: + sync_logs[row[Constant.CONNECTOR_ID]] = { + row[Constant.SYNC_ID]: { + row["message_event"]: ( + row[Constant.TIME_STAMP].timestamp(), + row[Constant.MESSAGE_DATA], + ) + } + } + elif row[Constant.SYNC_ID] not in sync_logs[row[Constant.CONNECTOR_ID]]: + sync_logs[row[Constant.CONNECTOR_ID]][row[Constant.SYNC_ID]] = { + row["message_event"]: ( + row[Constant.TIME_STAMP].timestamp(), + row[Constant.MESSAGE_DATA], + ) + } + else: + sync_logs[row[Constant.CONNECTOR_ID]][row[Constant.SYNC_ID]][ + row["message_event"] + ] = (row[Constant.TIME_STAMP].timestamp(), row[Constant.MESSAGE_DATA]) + + return sync_logs + + def _get_jobs_list( + self, connector_sync_log: Optional[Dict[str, Dict]] + ) -> List[Job]: jobs: List[Job] = [] - sync_start_logs = { - row[Constant.SYNC_ID]: row - for row in self._query( - self.fivetran_log_query.get_sync_start_logs_query( - connector_id=connector_id - ) - ) - } - sync_end_logs = { - row[Constant.SYNC_ID]: row - for row in self._query( - self.fivetran_log_query.get_sync_end_logs_query( - connector_id=connector_id - ) - ) - } - for sync_id in sync_start_logs.keys(): - if sync_end_logs.get(sync_id) is None: - # If no sync-end event log for this sync id that means sync is still in progress + if connector_sync_log is None: + return jobs + for sync_id in connector_sync_log.keys(): + if len(connector_sync_log[sync_id]) != 2: + # If both sync-start and sync-end event log not present for this sync that means sync is still in progress continue - message_data = sync_end_logs[sync_id][Constant.MESSAGE_DATA] + message_data = connector_sync_log[sync_id]["sync_end"][1] if message_data is None: continue message_data = json.loads(message_data) @@ -145,12 +201,8 @@ def _get_jobs_list(self, connector_id: str) -> List[Job]: jobs.append( Job( job_id=sync_id, - start_time=round( - sync_start_logs[sync_id][Constant.TIME_STAMP].timestamp() - ), - end_time=round( - sync_end_logs[sync_id][Constant.TIME_STAMP].timestamp() - ), + start_time=round(connector_sync_log[sync_id]["sync_start"][0]), + end_time=round(connector_sync_log[sync_id]["sync_end"][0]), status=message_data[Constant.STATUS], ) ) @@ -172,6 +224,9 @@ def get_allowed_connectors_list( self, connector_patterns: AllowDenyPattern, report: FivetranSourceReport ) -> List[Connector]: connectors: List[Connector] = [] + sync_logs = self._get_all_connector_sync_logs() + table_lineage_metadata = self._get_connectors_table_lineage_metadata() + column_lineage_metadata = self._get_column_lineage_metadata() connector_list = self._query(self.fivetran_log_query.get_connectors_query()) for connector in connector_list: if not connector_patterns.allowed(connector[Constant.CONNECTOR_NAME]): @@ -189,9 +244,14 @@ def get_allowed_connectors_list( connector[Constant.CONNECTING_USER_ID] ), table_lineage=self._get_table_lineage( - connector[Constant.CONNECTOR_ID] + column_lineage_metadata=column_lineage_metadata, + table_lineage_result=table_lineage_metadata.get( + connector[Constant.CONNECTOR_ID] + ), + ), + jobs=self._get_jobs_list( + sync_logs.get(connector[Constant.CONNECTOR_ID]) ), - jobs=self._get_jobs_list(connector[Constant.CONNECTOR_ID]), ) ) return connectors diff --git a/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_query.py b/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_query.py index f1c818150c18f..8f621bc3ffd06 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_query.py +++ b/metadata-ingestion/src/datahub/ingestion/source/fivetran/fivetran_query.py @@ -30,26 +30,20 @@ def get_user_query(self, user_id: str) -> str: FROM {self.db_clause}user WHERE id = '{user_id}'""" - def get_sync_start_logs_query(self, connector_id: str) -> str: + def get_sync_logs_query(self) -> str: return f""" - SELECT time_stamp, - sync_id - FROM {self.db_clause}log - WHERE message_event = 'sync_start' - and connector_id = '{connector_id}' order by time_stamp""" - - def get_sync_end_logs_query(self, connector_id: str) -> str: - return f""" - SELECT time_stamp, + SELECT connector_id, sync_id, - message_data + message_event, + message_data, + time_stamp FROM {self.db_clause}log - WHERE message_event = 'sync_end' - and connector_id = '{connector_id}' order by time_stamp""" + WHERE message_event in ('sync_start', 'sync_end')""" - def get_table_lineage_query(self, connector_id: str) -> str: + def get_table_lineage_query(self) -> str: return f""" - SELECT stm.id as source_table_id, + SELECT stm.connector_id as connector_id, + stm.id as source_table_id, stm.name as source_table_name, ssm.name as source_schema_name, dtm.id as destination_table_id, @@ -59,17 +53,16 @@ def get_table_lineage_query(self, connector_id: str) -> str: JOIN {self.db_clause}source_table_metadata as stm on tl.source_table_id = stm.id JOIN {self.db_clause}destination_table_metadata as dtm on tl.destination_table_id = dtm.id JOIN {self.db_clause}source_schema_metadata as ssm on stm.schema_id = ssm.id - JOIN {self.db_clause}destination_schema_metadata as dsm on dtm.schema_id = dsm.id - WHERE stm.connector_id = '{connector_id}'""" + JOIN {self.db_clause}destination_schema_metadata as dsm on dtm.schema_id = dsm.id""" - def get_column_lineage_query( - self, source_table_id: str, destination_table_id: str - ) -> str: + def get_column_lineage_query(self) -> str: return f""" - SELECT scm.name as source_column_name, + SELECT scm.table_id as source_table_id, + dcm.table_id as destination_table_id, + scm.name as source_column_name, dcm.name as destination_column_name FROM {self.db_clause}column_lineage as cl - JOIN {self.db_clause}source_column_metadata as scm on - (cl.source_column_id = scm.id and scm.table_id = {source_table_id}) - JOIN {self.db_clause}destination_column_metadata as dcm on - (cl.destination_column_id = dcm.id and dcm.table_id = {destination_table_id})""" + JOIN {self.db_clause}source_column_metadata as scm + on cl.source_column_id = scm.id + JOIN {self.db_clause}destination_column_metadata as dcm + on cl.destination_column_id = dcm.id""" diff --git a/metadata-ingestion/src/datahub/ingestion/source/identity/azure_ad.py b/metadata-ingestion/src/datahub/ingestion/source/identity/azure_ad.py index 7e3ff7d4fb84c..2bd05ca11e234 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/identity/azure_ad.py +++ b/metadata-ingestion/src/datahub/ingestion/source/identity/azure_ad.py @@ -263,7 +263,7 @@ def create(cls, config_dict, ctx): return cls(config, ctx) def __init__(self, config: AzureADConfig, ctx: PipelineContext): - super(AzureADSource, self).__init__(config, ctx) + super().__init__(config, ctx) self.config = config self.report = AzureADSourceReport( filtered_tracking=self.config.filtered_tracking @@ -488,7 +488,7 @@ def _get_azure_ad_group_members(self, azure_ad_group: dict) -> Iterable[List]: yield from self._get_azure_ad_data(kind=kind) def _get_azure_ad_data(self, kind: str) -> Iterable[List]: - headers = {"Authorization": "Bearer {}".format(self.token)} + headers = {"Authorization": f"Bearer {self.token}"} # 'ConsistencyLevel': 'eventual'} url = self.config.graph_url + kind while True: diff --git a/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py b/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py index 5c1edce7da6c9..49b6422902299 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py +++ b/metadata-ingestion/src/datahub/ingestion/source/identity/okta.py @@ -289,7 +289,7 @@ def create(cls, config_dict, ctx): return cls(config, ctx) def __init__(self, config: OktaConfig, ctx: PipelineContext): - super(OktaSource, self).__init__(config, ctx) + super().__init__(config, ctx) self.config = config self.report = OktaSourceReport() self.okta_client = self._create_okta_client() @@ -465,8 +465,7 @@ def _get_okta_groups( "okta_groups", f"Failed to fetch Groups from Okta API: {err}" ) if groups: - for group in groups: - yield group + yield from groups if resp and resp.has_next(): sleep(self.config.delay_seconds) try: @@ -504,8 +503,7 @@ def _get_okta_group_users( f"Failed to fetch Users of Group {group.profile.name} from Okta API: {err}", ) if users: - for user in users: - yield user + yield from users if resp and resp.has_next(): sleep(self.config.delay_seconds) try: @@ -542,8 +540,7 @@ def _get_okta_users(self, event_loop: asyncio.AbstractEventLoop) -> Iterable[Use "okta_users", f"Failed to fetch Users from Okta API: {err}" ) if users: - for user in users: - yield user + yield from users if resp and resp.has_next(): sleep(self.config.delay_seconds) try: diff --git a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py index 1a1e012e80633..cf70eb95762c4 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py +++ b/metadata-ingestion/src/datahub/ingestion/source/kafka_connect.py @@ -263,12 +263,12 @@ def __init__( KNOWN_NONTOPICROUTING_TRANSFORMS = ( KAFKA_NONTOPICROUTING_TRANSFORMS + [ - "org.apache.kafka.connect.transforms.{}".format(t) + f"org.apache.kafka.connect.transforms.{t}" for t in KAFKA_NONTOPICROUTING_TRANSFORMS ] + CONFLUENT_NONTOPICROUTING_TRANSFORMS + [ - "io.confluent.connect.transforms.{}".format(t) + f"io.confluent.connect.transforms.{t}" for t in CONFLUENT_NONTOPICROUTING_TRANSFORMS ] ) @@ -314,9 +314,9 @@ def get_parser( transform = {"name": name} transforms.append(transform) for key in self.connector_manifest.config.keys(): - if key.startswith("transforms.{}.".format(name)): + if key.startswith(f"transforms.{name}."): transform[ - key.replace("transforms.{}.".format(name), "") + key.replace(f"transforms.{name}.", "") ] = self.connector_manifest.config[key] return self.JdbcParser( @@ -729,7 +729,7 @@ def _extract_lineages(self): source_platform = parser.source_platform server_name = parser.server_name database_name = parser.database_name - topic_naming_pattern = r"({0})\.(\w+\.\w+)".format(server_name) + topic_naming_pattern = rf"({server_name})\.(\w+\.\w+)" if not self.connector_manifest.topic_names: return lineages @@ -1089,7 +1089,7 @@ def transform_connector_config( for k, v in connector_config.items(): for key, value in lookupsByProvider.items(): if key in v: - connector_config[k] = v.replace(key, value) + connector_config[k] = connector_config[k].replace(key, value) @platform_name("Kafka Connect") diff --git a/metadata-ingestion/src/datahub/ingestion/source/ldap.py b/metadata-ingestion/src/datahub/ingestion/source/ldap.py index 72985688273f6..1368a5b83fe6f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/ldap.py +++ b/metadata-ingestion/src/datahub/ingestion/source/ldap.py @@ -205,7 +205,7 @@ class LDAPSource(StatefulIngestionSourceBase): def __init__(self, ctx: PipelineContext, config: LDAPSourceConfig): """Constructor.""" - super(LDAPSource, self).__init__(config, ctx) + super().__init__(config, ctx) self.config = config # ensure prior defaults are in place diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lkml_patched.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lkml_patched.py index 6506682b8ed8d..a44d7e5215c35 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lkml_patched.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lkml_patched.py @@ -24,5 +24,5 @@ def load_lkml(path: Union[str, pathlib.Path]) -> dict: # Using this method instead of lkml.load directly ensures # that our patches to lkml are applied. - with open(path, "r") as file: + with open(path) as file: return lkml.load(file) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py index 90ba7aeb8f87a..b6cc97b2e5fda 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py @@ -245,6 +245,56 @@ class ViewField: upstream_fields: List[str] = dataclasses_field(default_factory=list) +@dataclass +class ExploreUpstreamViewField: + explore: LookmlModelExplore + field: LookmlModelExploreField + + def _form_field_name(self): + assert self.field.name is not None + + if len(self.field.name.split(".")) != 2: + return self.field.name # Inconsistent info received + + view_name: Optional[str] = self.explore.name + + if ( + self.field.original_view is not None + ): # if `from` is used in explore then original_view is pointing to + # lookml view + view_name = self.field.original_view + + field_name = self.field.name.split(".")[1] + + return f"{view_name}.{field_name}" + + def upstream(self) -> str: + assert self.field.name is not None + + if self.field.dimension_group is None: # It is not part of Dimensional Group + return self._form_field_name() + + if self.field.field_group_variant is None: + return ( + self._form_field_name() + ) # Variant i.e. Month, Day, Year ... is not available + + if self.field.type is None or not self.field.type.startswith("date_"): + return ( + self._form_field_name() + ) # for Dimensional Group the type is always start with date_[time|date] + + if not self.field.name.endswith(f"_{self.field.field_group_variant.lower()}"): + return ( + self._form_field_name() + ) # if the explore field is generated because of Dimensional Group in View + # then the field_name should ends with field_group_variant + + return self._form_field_name()[ + : -(len(self.field.field_group_variant.lower()) + 1) + ] # remove variant at the end. +1 for "_" + + def create_view_project_map(view_fields: List[ViewField]) -> Dict[str, str]: """ Each view in a model has unique name. @@ -793,6 +843,13 @@ def from_api( # noqa: C901 if dim_field.name is None: continue else: + dimension_upstream_field: ExploreUpstreamViewField = ( + ExploreUpstreamViewField( + explore=explore, + field=dim_field, + ) + ) + view_fields.append( ViewField( name=dim_field.name, @@ -823,7 +880,9 @@ def from_api( # noqa: C901 if dim_field.primary_key else False ), - upstream_fields=[dim_field.name], + upstream_fields=[ + dimension_upstream_field.upstream() + ], ) ) if explore.fields.measures is not None: @@ -831,6 +890,13 @@ def from_api( # noqa: C901 if measure_field.name is None: continue else: + measure_upstream_field: ExploreUpstreamViewField = ( + ExploreUpstreamViewField( + explore=explore, + field=measure_field, + ) + ) + view_fields.append( ViewField( name=measure_field.name, @@ -857,7 +923,7 @@ def from_api( # noqa: C901 if measure_field.primary_key else False ), - upstream_fields=[measure_field.name], + upstream_fields=[measure_upstream_field.upstream()], ) ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py index ec4d8b78b0d06..8de213cfabaf0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_config.py @@ -1,11 +1,10 @@ import dataclasses import os import re -from typing import Any, Dict, List, Optional, Union, cast +from typing import Any, ClassVar, Dict, List, Optional, Union, cast import pydantic from pydantic import Field, validator -from typing_extensions import ClassVar from datahub.configuration import ConfigModel from datahub.configuration.common import AllowDenyPattern diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py index dfa374fe0d779..c4ba3146031af 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py @@ -644,9 +644,7 @@ def _make_chart_metadata_events( customProperties={ "upstream_fields": ( ",".join( - sorted( - set(field.name for field in dashboard_element.input_fields) - ) + sorted({field.name for field in dashboard_element.input_fields}) ) if dashboard_element.input_fields else "" @@ -969,8 +967,7 @@ def _make_dashboard_and_chart_mces( dashboard_events = self._make_dashboard_metadata_events( looker_dashboard, list(chart_urns) ) - for dashboard_event in dashboard_events: - yield dashboard_event + yield from dashboard_events def get_ownership( self, looker_dashboard: LookerDashboard diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_usage.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_usage.py index e119e88a24bd7..c97025d75229b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_usage.py @@ -273,7 +273,7 @@ def _fill_user_stat_aspect( logger.debug("Entering fill user stat aspect") # We first resolve all the users using a threadpool to warm up the cache - user_ids = set([self._get_user_identifier(row) for row in user_wise_rows]) + user_ids = {self._get_user_identifier(row) for row in user_wise_rows} start_time = datetime.datetime.now() with concurrent.futures.ThreadPoolExecutor( max_workers=self.config.max_threads @@ -507,7 +507,7 @@ def append_user_stat( user_urn: Optional[str] = user.get_urn(self.config.strip_user_ids_from_email) if user_urn is None: - logger.warning("user_urn not found for the user {}".format(user)) + logger.warning(f"user_urn not found for the user {user}") return dashboard_stat_aspect.userCounts.append( @@ -614,7 +614,7 @@ def append_user_stat( user_urn: Optional[str] = user.get_urn(self.config.strip_user_ids_from_email) if user_urn is None: - logger.warning("user_urn not found for the user {}".format(user)) + logger.warning(f"user_urn not found for the user {user}") return chart_stat_aspect.userCounts.append( diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py index 0a646f8f7d824..4a872f8b1a025 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py @@ -112,6 +112,34 @@ _MODEL_FILE_EXTENSION = ".model.lkml" +def deduplicate_fields(fields: List[ViewField]) -> List[ViewField]: + # Remove duplicates filed from self.fields + # Logic is: If more than a field has same ViewField.name then keep only one filed where ViewField.field_type + # is DIMENSION_GROUP. + # Looker Constraint: + # - Any field declared as dimension or measure can be redefined as dimension_group. + # - Any field declared in dimension can't be redefined in measure and vice-versa. + + dimension_group_field_names: List[str] = [ + field.name + for field in fields + if field.field_type == ViewFieldType.DIMENSION_GROUP + ] + + new_fields: List[ViewField] = [] + + for field in fields: + if ( + field.name in dimension_group_field_names + and field.field_type != ViewFieldType.DIMENSION_GROUP + ): + continue + + new_fields.append(field) + + return new_fields + + def _get_bigquery_definition( looker_connection: DBConnection, ) -> Tuple[str, Optional[str], Optional[str]]: @@ -642,7 +670,7 @@ def _load_viewfile( return self.viewfile_cache[path] try: - with open(path, "r") as file: + with open(path) as file: raw_file_content = file.read() except Exception as e: self.reporter.report_failure(path, f"failed to load view file: {e}") @@ -1032,10 +1060,11 @@ def _get_fields( if "sql" in field_dict and populate_sql_logic_in_descriptions else "" ) + description = field_dict.get("description", default_description) label = field_dict.get("label", "") upstream_fields = [] - if type_cls == ViewFieldType.DIMENSION and extract_column_level_lineage: + if extract_column_level_lineage: if field_dict.get("sql") is not None: for upstream_field_match in re.finditer( r"\${TABLE}\.[\"]*([\.\w]+)", field_dict["sql"] @@ -1154,6 +1183,8 @@ def from_looker_dict( ) fields: List[ViewField] = dimensions + dimension_groups + measures + fields = deduplicate_fields(fields) + # Prep "default" values for the view, which will be overridden by the logic below. view_logic = looker_viewfile.raw_file_content[:max_file_snippet_length] sql_table_names: List[str] = [] diff --git a/metadata-ingestion/src/datahub/ingestion/source/metabase.py b/metadata-ingestion/src/datahub/ingestion/source/metabase.py index fc60dc6406730..6f8f5097b6149 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/metabase.py +++ b/metadata-ingestion/src/datahub/ingestion/source/metabase.py @@ -1,5 +1,6 @@ import json import logging +from dataclasses import dataclass from datetime import datetime, timezone from functools import lru_cache from typing import Dict, Iterable, List, Optional, Tuple, Union @@ -21,8 +22,17 @@ platform_name, support_status, ) -from datahub.ingestion.api.source import Source, SourceReport +from datahub.ingestion.api.source import MetadataWorkUnitProcessor, Source, SourceReport from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.ingestion.source.state.stale_entity_removal_handler import ( + StaleEntityRemovalHandler, + StaleEntityRemovalSourceReport, + StatefulStaleMetadataRemovalConfig, +) +from datahub.ingestion.source.state.stateful_ingestion_base import ( + StatefulIngestionConfigBase, + StatefulIngestionSourceBase, +) from datahub.metadata.com.linkedin.pegasus2avro.common import ( AuditStamp, ChangeAuditStamps, @@ -50,7 +60,7 @@ DATASOURCE_URN_RECURSION_LIMIT = 5 -class MetabaseConfig(DatasetLineageProviderConfigBase): +class MetabaseConfig(DatasetLineageProviderConfigBase, StatefulIngestionConfigBase): # See the Metabase /api/session endpoint for details # https://www.metabase.com/docs/latest/api-documentation.html#post-apisession connect_uri: str = Field(default="localhost:3000", description="Metabase host URL.") @@ -84,6 +94,7 @@ class MetabaseConfig(DatasetLineageProviderConfigBase): default=False, description="Flag that if true, exclude other user collections", ) + stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None @validator("connect_uri", "display_uri") def remove_trailing_slash(cls, v): @@ -97,12 +108,17 @@ def default_display_uri_to_connect_uri(cls, values): return values +@dataclass +class MetabaseReport(StaleEntityRemovalSourceReport): + pass + + @platform_name("Metabase") @config_class(MetabaseConfig) @support_status(SupportStatus.CERTIFIED) @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default") @capability(SourceCapability.LINEAGE_COARSE, "Supported by default") -class MetabaseSource(Source): +class MetabaseSource(StatefulIngestionSourceBase): """ This plugin extracts Charts, dashboards, and associated metadata. This plugin is in beta and has only been tested on PostgreSQL and H2 database. @@ -147,17 +163,18 @@ class MetabaseSource(Source): """ config: MetabaseConfig - report: SourceReport + report: MetabaseReport platform = "metabase" def __hash__(self): return id(self) def __init__(self, ctx: PipelineContext, config: MetabaseConfig): - super().__init__(ctx) + super().__init__(config, ctx) self.config = config - self.report = SourceReport() + self.report = MetabaseReport() self.setup_session() + self.source_config: MetabaseConfig = config def setup_session(self) -> None: login_response = requests.post( @@ -739,6 +756,14 @@ def create(cls, config_dict: dict, ctx: PipelineContext) -> Source: config = MetabaseConfig.parse_obj(config_dict) return cls(ctx, config) + def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: + return [ + *super().get_workunit_processors(), + StaleEntityRemovalHandler.create( + self, self.source_config, self.ctx + ).workunit_processor, + ] + def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: yield from self.emit_card_mces() yield from self.emit_dashboard_mces() diff --git a/metadata-ingestion/src/datahub/ingestion/source/metadata/business_glossary.py b/metadata-ingestion/src/datahub/ingestion/source/metadata/business_glossary.py index 0edc8d9752983..d3c4e2e3cd80e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/metadata/business_glossary.py +++ b/metadata-ingestion/src/datahub/ingestion/source/metadata/business_glossary.py @@ -520,12 +520,11 @@ def get_workunits_internal( materialize_all_node_urns(glossary_config, self.config.enable_auto_id) path_vs_id = populate_path_vs_id(glossary_config) - for event in auto_workunit( + yield from auto_workunit( get_mces( glossary_config, path_vs_id, ingestion_config=self.config, ctx=self.ctx ) - ): - yield event + ) def get_report(self): return self.report diff --git a/metadata-ingestion/src/datahub/ingestion/source/mode.py b/metadata-ingestion/src/datahub/ingestion/source/mode.py index 8dcdc5eeef404..7ef9ba051151d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mode.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mode.py @@ -1,3 +1,4 @@ +import dataclasses import logging import re import time @@ -9,16 +10,20 @@ import dateutil.parser as dp import pydantic import requests +import sqlglot import tenacity +import yaml +from liquid import Template, Undefined from pydantic import Field, validator from requests.models import HTTPBasicAuth, HTTPError from sqllineage.runner import LineageRunner from tenacity import retry_if_exception_type, stop_after_attempt, wait_exponential import datahub.emitter.mce_builder as builder -from datahub.configuration.common import ConfigModel +from datahub.configuration.common import AllowDenyPattern, ConfigModel from datahub.configuration.source_common import DatasetLineageProviderConfigBase from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.emitter.mcp_builder import ContainerKey, gen_containers from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.decorators import ( SourceCapability, @@ -30,6 +35,10 @@ ) from datahub.ingestion.api.source import MetadataWorkUnitProcessor, SourceReport from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.ingestion.source.common.subtypes import ( + BIAssetSubTypes, + BIContainerSubTypes, +) from datahub.ingestion.source.state.stale_entity_removal_handler import ( StaleEntityRemovalHandler, StaleEntityRemovalSourceReport, @@ -49,8 +58,9 @@ ) from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent from datahub.metadata.schema_classes import ( + BrowsePathEntryClass, BrowsePathsClass, - ChangeTypeClass, + BrowsePathsV2Class, ChartInfoClass, ChartQueryClass, ChartQueryTypeClass, @@ -77,9 +87,7 @@ QuerySourceClass, QueryStatementClass, SchemaFieldClass, - SchemaFieldDataTypeClass, SchemaMetadataClass, - StringTypeClass, SubTypesClass, TagAssociationClass, TagPropertiesClass, @@ -94,10 +102,16 @@ infer_output_schema, ) from datahub.utilities import config_clean +from datahub.utilities.lossy_collections import LossyDict, LossyList logger: logging.Logger = logging.getLogger(__name__) +class SpaceKey(ContainerKey): + # Note that Mode has renamed Spaces to Collections. + space_token: str + + class ModeAPIConfig(ConfigModel): retry_backoff_multiplier: Union[int, float] = Field( default=2, @@ -121,11 +135,22 @@ class ModeConfig(StatefulIngestionConfigBase, DatasetLineageProviderConfigBase): password: pydantic.SecretStr = Field( description="Mode password for authentication." ) - workspace: Optional[str] = Field(default=None, description="") + + workspace: str = Field( + description="The Mode workspace name. Find it in Settings > Workspace > Details." + ) default_schema: str = Field( default="public", description="Default schema to use when schema is not provided in an SQL query", ) + + space_pattern: AllowDenyPattern = Field( + default=AllowDenyPattern( + deny=["^Personal$"], + ), + description="Regex patterns for mode spaces to filter in ingestion (Spaces named as 'Personal' are filtered by default.) Specify regex to only match the space name. e.g. to only ingest space named analytics, use the regex 'analytics'", + ) + owner_username_instead_of_email: Optional[bool] = Field( default=True, description="Use username for owner URN instead of Email" ) @@ -155,7 +180,22 @@ class HTTPError429(HTTPError): @dataclass class ModeSourceReport(StaleEntityRemovalSourceReport): - pass + filtered_spaces: LossyList[str] = dataclasses.field(default_factory=LossyList) + num_sql_parsed: int = 0 + num_sql_parser_failures: int = 0 + num_sql_parser_success: int = 0 + num_sql_parser_table_error: int = 0 + num_sql_parser_column_error: int = 0 + num_query_template_render: int = 0 + num_query_template_render_failures: int = 0 + num_query_template_render_success: int = 0 + + dropped_imported_datasets: LossyDict[str, LossyList[str]] = dataclasses.field( + default_factory=LossyDict + ) + + def report_dropped_space(self, ent_name: str) -> None: + self.filtered_spaces.append(ent_name) @platform_name("Mode") @@ -268,21 +308,78 @@ def __init__(self, ctx: PipelineContext, config: ModeConfig): except HTTPError as http_error: self.report.report_failure( key="mode-session", - reason=f"Unable to retrieve user " - f"{self.config.token} information, " - f"{str(http_error)}", + reason=f"Unable to verify connection. Error was: {str(http_error)}", ) self.workspace_uri = f"{self.config.connect_uri}/api/{self.config.workspace}" self.space_tokens = self._get_space_name_and_tokens() + def _browse_path_space(self) -> List[BrowsePathEntryClass]: + # TODO: Use containers for the workspace? + return [ + BrowsePathEntryClass(id=self.config.workspace), + ] + + def _browse_path_dashboard(self, space_token: str) -> List[BrowsePathEntryClass]: + space_container_urn = self.gen_space_key(space_token).as_urn() + return [ + *self._browse_path_space(), + BrowsePathEntryClass(id=space_container_urn, urn=space_container_urn), + ] + + def _browse_path_query( + self, space_token: str, report_info: dict + ) -> List[BrowsePathEntryClass]: + dashboard_urn = self._dashboard_urn(report_info) + return [ + *self._browse_path_dashboard(space_token), + BrowsePathEntryClass(id=dashboard_urn, urn=dashboard_urn), + ] + + def _browse_path_chart( + self, space_token: str, report_info: dict, query_info: dict + ) -> List[BrowsePathEntryClass]: + query_urn = self.get_dataset_urn_from_query(query_info) + return [ + *self._browse_path_query(space_token, report_info), + BrowsePathEntryClass(id=query_urn, urn=query_urn), + ] + + def _dashboard_urn(self, report_info: dict) -> str: + return builder.make_dashboard_urn(self.platform, report_info.get("id", "")) + + def _parse_last_run_at(self, report_info: dict) -> Optional[int]: + # Mode queries are refreshed, and that timestamp is reflected correctly here. + # However, datasets are synced, and that's captured by the sync timestamps. + # However, this is probably accurate enough for now. + last_refreshed_ts = None + last_refreshed_ts_str = report_info.get("last_run_at") + if last_refreshed_ts_str: + last_refreshed_ts = int(dp.parse(last_refreshed_ts_str).timestamp() * 1000) + + return last_refreshed_ts + def construct_dashboard( - self, space_name: str, report_info: dict - ) -> DashboardSnapshot: + self, space_token: str, report_info: dict + ) -> Optional[Tuple[DashboardSnapshot, MetadataChangeProposalWrapper]]: report_token = report_info.get("token", "") - dashboard_urn = builder.make_dashboard_urn( - self.platform, report_info.get("id", "") - ) + # logger.debug(f"Processing report {report_info.get('name', '')}: {report_info}") + + if not report_token: + self.report.report_warning( + key="mode-report", + reason=f"Report token is missing for {report_info.get('id', '')}", + ) + return None + + if not report_info.get("id"): + self.report.report_warning( + key="mode-report", + reason=f"Report id is missing for {report_info.get('token', '')}", + ) + return None + + dashboard_urn = self._dashboard_urn(report_info) dashboard_snapshot = DashboardSnapshot( urn=dashboard_urn, aspects=[], @@ -290,50 +387,64 @@ def construct_dashboard( title = report_info.get("name", "") description = report_info.get("description", "") - last_modified = ChangeAuditStamps() + + # Creator + created ts. creator = self._get_creator( report_info.get("_links", {}).get("creator", {}).get("href", "") ) if creator: - modified_actor = builder.make_user_urn(creator) - if not report_info.get("last_saved_at"): - # Sometimes mode returns null for last_saved_at. - # In that case, we use the created_at timestamp instead. - report_info["last_saved_at"] = report_info.get("created_at") - - modified_ts = int( - dp.parse(f"{report_info.get('last_saved_at', 'now')}").timestamp() - * 1000 - ) + creator_actor = builder.make_user_urn(creator) created_ts = int( dp.parse(f"{report_info.get('created_at', 'now')}").timestamp() * 1000 ) - last_modified = ChangeAuditStamps( - created=AuditStamp(time=created_ts, actor=modified_actor), - lastModified=AuditStamp(time=modified_ts, actor=modified_actor), + last_modified.created = AuditStamp(time=created_ts, actor=creator_actor) + + # Last modified ts. + last_modified_ts_str = report_info.get("last_saved_at") + if not last_modified_ts_str: + # Sometimes mode returns null for last_saved_at. + # In that case, we use the edited_at timestamp instead. + last_modified_ts_str = report_info.get("edited_at") + if last_modified_ts_str: + modified_ts = int(dp.parse(last_modified_ts_str).timestamp() * 1000) + last_modified.lastModified = AuditStamp( + time=modified_ts, actor="urn:li:corpuser:unknown" ) + # Last refreshed ts. + last_refreshed_ts = self._parse_last_run_at(report_info) + dashboard_info_class = DashboardInfoClass( - description=description, - title=title, + description=description if description else "", + title=title if title else "", charts=self._get_chart_urns(report_token), lastModified=last_modified, + lastRefreshed=last_refreshed_ts, dashboardUrl=f"{self.config.connect_uri}/{self.config.workspace}/reports/{report_token}", customProperties={}, ) dashboard_snapshot.aspects.append(dashboard_info_class) # browse path + space_name = self.space_tokens[space_token] browse_path = BrowsePathsClass( paths=[ f"/mode/{self.config.workspace}/" f"{space_name}/" - f"{report_info.get('name')}" + f"{title if title else report_info.get('id', '')}" ] ) dashboard_snapshot.aspects.append(browse_path) + browse_path_v2 = BrowsePathsV2Class( + path=self._browse_path_dashboard(space_token) + ) + browse_mcp = MetadataChangeProposalWrapper( + entityUrn=dashboard_urn, + aspect=browse_path_v2, + ) + # Ownership ownership = self._get_ownership( self._get_creator( @@ -343,7 +454,7 @@ def construct_dashboard( if ownership is not None: dashboard_snapshot.aspects.append(ownership) - return dashboard_snapshot + return dashboard_snapshot, browse_mcp @lru_cache(maxsize=None) def _get_ownership(self, user: str) -> Optional[OwnershipClass]: @@ -372,7 +483,7 @@ def _get_creator(self, href: str) -> Optional[str]: else user_json.get("email") ) except HTTPError as http_error: - self.report.report_failure( + self.report.report_warning( key="mode-user", reason=f"Unable to retrieve user for {href}, " f"Reason: {str(http_error)}", @@ -386,6 +497,7 @@ def _get_chart_urns(self, report_token: str) -> list: charts = self._get_charts(report_token, query.get("token", "")) # build chart urns for chart in charts: + logger.debug(f"Chart: {chart.get('token')}") chart_urn = builder.make_chart_urn( self.platform, chart.get("token", "") ) @@ -396,10 +508,19 @@ def _get_chart_urns(self, report_token: str) -> list: def _get_space_name_and_tokens(self) -> dict: space_info = {} try: - payload = self._get_request_json(f"{self.workspace_uri}/spaces") + logger.debug(f"Retrieving spaces for {self.workspace_uri}") + payload = self._get_request_json(f"{self.workspace_uri}/spaces?filter=all") spaces = payload.get("_embedded", {}).get("spaces", {}) - + logger.debug( + f"Got {len(spaces)} spaces from workspace {self.workspace_uri}" + ) for s in spaces: + logger.debug(f"Space: {s.get('name')}") + space_name = s.get("name", "") + if not self.config.space_pattern.allowed(space_name): + self.report.report_dropped_space(space_name) + logging.debug(f"Skipping space {space_name} due to space pattern") + continue space_info[s.get("token", "")] = s.get("name", "") except HTTPError as http_error: self.report.report_failure( @@ -414,6 +535,7 @@ def _get_chart_type(self, token: str, display_type: str) -> Optional[str]: type_mapping = { "table": ChartTypeClass.TABLE, "bar": ChartTypeClass.BAR, + "bigNumber": ChartTypeClass.TEXT, "line": ChartTypeClass.LINE, "stackedBar100": ChartTypeClass.BAR, "stackedBar": ChartTypeClass.BAR, @@ -428,19 +550,22 @@ def _get_chart_type(self, token: str, display_type: str) -> Optional[str]: "bigValue": ChartTypeClass.TEXT, "pivotTable": ChartTypeClass.TABLE, "linePlusBar": None, + "vegas": None, + "vegasPivotTable": ChartTypeClass.TABLE, + "histogram": ChartTypeClass.HISTOGRAM, } if not display_type: self.report.report_warning( - key=f"mode-chart-{token}", - reason=f"Chart type {display_type} is missing. " f"Setting to None", + key="mode-chart-type-mapper", + reason=f"{token}: Chart type is missing. Setting to None", ) return None try: chart_type = type_mapping[display_type] except KeyError: self.report.report_warning( - key=f"mode-chart-{token}", - reason=f"Chart type {display_type} not supported. " f"Setting to None", + key="mode-chart-type-mapper", + reason=f"{token}: Chart type {display_type} not supported. Setting to None", ) chart_type = None @@ -449,7 +574,9 @@ def _get_chart_type(self, token: str, display_type: str) -> Optional[str]: def construct_chart_custom_properties( self, chart_detail: dict, chart_type: str ) -> Dict: - custom_properties = {} + custom_properties = { + "ChartType": chart_type, + } metadata = chart_detail.get("encoding", {}) if chart_type == "table": columns = list(chart_detail.get("fieldFormats", {}).keys()) @@ -457,10 +584,12 @@ def construct_chart_custom_properties( filters = metadata.get("filter", []) filters = filters[0].get("formula", "") if len(filters) else "" - custom_properties = { - "Columns": str_columns, - "Filters": filters[1:-1] if len(filters) else "", - } + custom_properties.update( + { + "Columns": str_columns, + "Filters": filters[1:-1] if len(filters) else "", + } + ) elif chart_type == "pivotTable": pivot_table = chart_detail.get("pivotTable", {}) @@ -469,12 +598,14 @@ def construct_chart_custom_properties( values = pivot_table.get("values", []) filters = pivot_table.get("filters", []) - custom_properties = { - "Columns": ", ".join(columns) if len(columns) else "", - "Rows": ", ".join(rows) if len(rows) else "", - "Metrics": ", ".join(values) if len(values) else "", - "Filters": ", ".join(filters) if len(filters) else "", - } + custom_properties.update( + { + "Columns": ", ".join(columns) if len(columns) else "", + "Rows": ", ".join(rows) if len(rows) else "", + "Metrics": ", ".join(values) if len(values) else "", + "Filters": ", ".join(filters) if len(filters) else "", + } + ) # list filters in their own row for filter in filters: custom_properties[f"Filter: {filter}"] = ", ".join( @@ -489,14 +620,16 @@ def construct_chart_custom_properties( value = metadata.get("value", []) filters = metadata.get("filter", []) - custom_properties = { - "X": x[0].get("formula", "") if len(x) else "", - "Y": y[0].get("formula", "") if len(y) else "", - "X2": x2[0].get("formula", "") if len(x2) else "", - "Y2": y2[0].get("formula", "") if len(y2) else "", - "Metrics": value[0].get("formula", "") if len(value) else "", - "Filters": filters[0].get("formula", "") if len(filters) else "", - } + custom_properties.update( + { + "X": x[0].get("formula", "") if len(x) else "", + "Y": y[0].get("formula", "") if len(y) else "", + "X2": x2[0].get("formula", "") if len(x2) else "", + "Y2": y2[0].get("formula", "") if len(y2) else "", + "Metrics": value[0].get("formula", "") if len(value) else "", + "Filters": filters[0].get("formula", "") if len(filters) else "", + } + ) return custom_properties @@ -532,21 +665,25 @@ def _get_datahub_friendly_platform(self, adapter, platform): return platform @lru_cache(maxsize=None) - def _get_platform_and_dbname( - self, data_source_id: int - ) -> Union[Tuple[str, str], Tuple[None, None]]: + def _get_data_sources(self) -> List[dict]: data_sources = [] try: ds_json = self._get_request_json(f"{self.workspace_uri}/data_sources") data_sources = ds_json.get("_embedded", {}).get("data_sources", []) except HTTPError as http_error: self.report.report_failure( - key=f"mode-datasource-{data_source_id}", - reason=f"No data sources found for datasource id: " - f"{data_source_id}, " - f"Reason: {str(http_error)}", + key="mode-data-sources", + reason=f"Unable to retrieve data sources. Reason: {str(http_error)}", ) + return data_sources + + @lru_cache(maxsize=None) + def _get_platform_and_dbname( + self, data_source_id: int + ) -> Union[Tuple[str, str], Tuple[None, None]]: + data_sources = self._get_data_sources() + if not data_sources: self.report.report_failure( key=f"mode-datasource-{data_source_id}", @@ -571,13 +708,13 @@ def _get_platform_and_dbname( def _replace_definitions(self, raw_query: str) -> str: query = raw_query - definitions = re.findall("({{[^}{]+}})", raw_query) + definitions = re.findall(r"({{(?:\s+)?@[^}{]+}})", raw_query) for definition_variable in definitions: definition_name, definition_alias = self._parse_definition_name( definition_variable ) definition_query = self._get_definition(definition_name) - # if unable to retrieve definition, then replace the {{}} so that it doesn't get picked up again in recurive call + # if unable to retrieve definition, then replace the {{}} so that it doesn't get picked up again in recursive call if definition_query is not None: query = query.replace( definition_variable, f"({definition_query}) as {definition_alias}" @@ -587,6 +724,8 @@ def _replace_definitions(self, raw_query: str) -> str: definition_variable, f"{definition_name} as {definition_alias}" ) query = self._replace_definitions(query) + query = query.replace("\\n", "\n") + query = query.replace("\\t", "\t") return query @@ -597,7 +736,7 @@ def _parse_definition_name(self, definition_variable: str) -> Tuple[str, str]: if len(name_match): name = name_match[0][1:] alias_match = re.findall( - r"as\s+\S+", definition_variable + r"as\s+\S+\w+", definition_variable ) # i.e ['as alias_name'] if len(alias_match): alias_match = alias_match[0].split(" ") @@ -685,24 +824,6 @@ def get_query_instance_urn_from_query(self, query_data: dict) -> str: data_source_id = query_data.get("data_source_id") return QueryUrn(f"{id}.{data_source_id}.{last_run_id}").urn() - def _get_upstream_warehouse_urn_for_query(self, query: dict) -> List[str]: - # create datasource urn - platform, db_name = self._get_platform_and_dbname(query.get("data_source_id")) - source_tables = self._get_source_from_query(query.get("raw_query")) - if not platform or not db_name or not source_tables: - return [] - datasource_urn = self._get_datasource_urn( - platform=platform, - platform_instance=( - self.config.platform_instance_map.get(platform) - if platform and self.config.platform_instance_map - else None - ), - database=db_name, - source_tables=list(source_tables), - ) - return datasource_urn - def set_field_tags(self, fields: List[SchemaFieldClass]) -> None: for field in fields: # It is not clear how to distinguish between measures and dimensions in Mode. @@ -717,17 +838,59 @@ def set_field_tags(self, fields: List[SchemaFieldClass]) -> None: tag = TagAssociationClass(tag=self.DIMENSION_TAG_URN) field.globalTags = GlobalTagsClass(tags=[tag]) + def normalize_mode_query(self, query: str) -> str: + regex = r"{% form %}(.*?){% endform %}" + rendered_query: str = query + normalized_query: str = query + + self.report.num_query_template_render += 1 + matches = re.findall(regex, query, re.MULTILINE | re.DOTALL | re.IGNORECASE) + try: + jinja_params: Dict = {} + if matches: + for match in matches: + definition = Template(source=match).render() + parameters = yaml.safe_load(definition) + for key in parameters.keys(): + jinja_params[key] = parameters[key].get("default", "") + + normalized_query = re.sub( + r"{% form %}(.*){% endform %}", + "", + query, + 0, + re.MULTILINE | re.DOTALL, + ) + + # Wherever we don't resolve the jinja params, we replace it with NULL + Undefined.__str__ = lambda self: "NULL" # type: ignore + rendered_query = Template(normalized_query).render(jinja_params) + self.report.num_query_template_render_success += 1 + except Exception as e: + logger.debug(f"Rendering query {query} failed with {e}") + self.report.num_query_template_render_failures += 1 + return rendered_query + + return rendered_query + def construct_query_from_api_data( self, report_token: str, query_data: dict, + space_token: str, + report_info: dict, ) -> Iterable[MetadataWorkUnit]: query_urn = self.get_dataset_urn_from_query(query_data) + query_token = query_data.get("token") dataset_props = DatasetPropertiesClass( name=query_data.get("name"), - description="", - externalUrl=f"{self.config.connect_uri}/{self.config.workspace}/reports/{report_token}/details/queries/{query_data.get('token')}", + description=f"""### Source Code +``` sql +{query_data.get("raw_query")} +``` + """, + externalUrl=f"{self.config.connect_uri}/{self.config.workspace}/reports/{report_token}/details/queries/{query_token}", customProperties=self.get_custom_props_from_dict( query_data, [ @@ -744,25 +907,26 @@ def construct_query_from_api_data( yield ( MetadataChangeProposalWrapper( - entityType="dataset", - changeType=ChangeTypeClass.UPSERT, entityUrn=query_urn, - aspectName="datasetProperties", aspect=dataset_props, ).as_workunit() ) - subtypes = SubTypesClass(typeNames=(["Query"])) + subtypes = SubTypesClass(typeNames=([BIAssetSubTypes.MODE_QUERY])) yield ( MetadataChangeProposalWrapper( - entityType="dataset", - changeType=ChangeTypeClass.UPSERT, entityUrn=query_urn, - aspectName="subTypes", aspect=subtypes, ).as_workunit() ) + yield MetadataChangeProposalWrapper( + entityUrn=query_urn, + aspect=BrowsePathsV2Class( + path=self._browse_path_query(space_token, report_info) + ), + ).as_workunit() + ( upstream_warehouse_platform, upstream_warehouse_db_name, @@ -772,8 +936,35 @@ def construct_query_from_api_data( # this means we can't infer the platform return + query = query_data["raw_query"] + query = self._replace_definitions(query) + normalized_query = self.normalize_mode_query(query) + query_to_parse = normalized_query + # If multiple query is present in the query, we get the last one. + # This won't work for complex cases where temp table is created and used in the same query. + # But it should be good enough for simple use-cases. + try: + for partial_query in sqlglot.parse(normalized_query): + if not partial_query: + continue + # This is hacky but on snowlake we want to change the default warehouse if use warehouse is present + if upstream_warehouse_platform == "snowflake": + regexp = r"use\s+warehouse\s+(.*)(\s+)?;" + matches = re.search( + regexp, + partial_query.sql(dialect=upstream_warehouse_platform), + re.MULTILINE | re.DOTALL | re.IGNORECASE, + ) + if matches and matches.group(1): + upstream_warehouse_db_name = matches.group(1) + + query_to_parse = partial_query.sql(dialect=upstream_warehouse_platform) + except Exception as e: + logger.debug(f"sqlglot.parse failed on: {normalized_query}, error: {e}") + query_to_parse = normalized_query + parsed_query_object = create_lineage_sql_parsed_result( - query=query_data["raw_query"], + query=query_to_parse, default_db=upstream_warehouse_db_name, platform=upstream_warehouse_platform, platform_instance=( @@ -785,9 +976,24 @@ def construct_query_from_api_data( graph=self.ctx.graph, ) + self.report.num_sql_parsed += 1 + if parsed_query_object.debug_info.table_error: + self.report.num_sql_parser_table_error += 1 + self.report.num_sql_parser_failures += 1 + logger.info( + f"Failed to parse compiled code for report: {report_token} query: {query_token} {parsed_query_object.debug_info.error} the query was [{query_to_parse}]" + ) + elif parsed_query_object.debug_info.column_error: + self.report.num_sql_parser_column_error += 1 + self.report.num_sql_parser_failures += 1 + logger.info( + f"Failed to generate CLL for report: {report_token} query: {query_token}: {parsed_query_object.debug_info.column_error} the query was [{query_to_parse}]" + ) + else: + self.report.num_sql_parser_success += 1 + schema_fields = infer_output_schema(parsed_query_object) if schema_fields: - schema_metadata = SchemaMetadataClass( schemaName="mode_query", platform=f"urn:li:dataPlatform:{self.platform}", @@ -801,10 +1007,7 @@ def construct_query_from_api_data( yield ( MetadataChangeProposalWrapper( - entityType="dataset", - changeType=ChangeTypeClass.UPSERT, entityUrn=query_urn, - aspectName="schemaMetadata", aspect=schema_metadata, ).as_workunit() ) @@ -854,10 +1057,7 @@ def construct_query_from_api_data( ) yield MetadataChangeProposalWrapper( - entityType="query", - changeType=ChangeTypeClass.UPSERT, entityUrn=query_instance_urn, - aspectName="queryProperties", aspect=query_properties, ).as_workunit() @@ -927,49 +1127,49 @@ def get_upstream_lineage_for_parsed_sql( wu.append( MetadataChangeProposalWrapper( - entityType="dataset", - changeType=ChangeTypeClass.UPSERT, entityUrn=query_urn, - aspectName="upstreamLineage", aspect=upstream_lineage, ).as_workunit() ) return wu - def get_formula_columns(self, node: Dict, columns: Set[str] = set()) -> Set[str]: + def get_formula_columns( + self, node: Dict, columns: Optional[Set[str]] = None + ) -> Set[str]: + columns = columns if columns is not None else set() if isinstance(node, dict): for key, item in node.items(): - node = item if isinstance(item, dict): - self.get_formula_columns(node, columns) - elif isinstance(node, list): - for i in node: + self.get_formula_columns(item, columns) + elif isinstance(item, list): + for i in item: if isinstance(i, dict): self.get_formula_columns(i, columns) - elif isinstance(node, str): + elif isinstance(item, str): if key == "formula": - column_names = re.findall(r"\[(.+?)\]", node) + column_names = re.findall(r"\[(.+?)\]", item) columns.update(column_names) return columns def get_input_fields( - self, chart_urn: str, chart_data: Dict, chart_fields: List[str], query_urn: str + self, + chart_urn: str, + chart_data: Dict, + chart_fields: Dict[str, SchemaFieldClass], + query_urn: str, ) -> Iterable[MetadataWorkUnit]: + # TODO: Identify which fields are used as X, Y, filters, etc and tag them accordingly. fields = self.get_formula_columns(chart_data) input_fields = [] - for field in sorted(fields): + for field in fields: if field.lower() not in chart_fields: continue input_field = InputFieldClass( schemaFieldUrn=builder.make_schema_field_urn(query_urn, field.lower()), - schemaField=SchemaFieldClass( - fieldPath=field.lower(), - type=SchemaFieldDataTypeClass(type=StringTypeClass()), - nativeDataType="string", - ), + schemaField=chart_fields[field.lower()], ) input_fields.append(input_field) @@ -984,8 +1184,16 @@ def get_input_fields( ).as_workunit() def construct_chart_from_api_data( - self, chart_data: dict, chart_fields: List[str], query: dict, path: str + self, + index: int, + chart_data: dict, + chart_fields: Dict[str, SchemaFieldClass], + query: dict, + space_token: str, + report_info: dict, + query_name: str, ) -> Iterable[MetadataWorkUnit]: + # logger.debug(f"Processing chart {chart_data.get('token', '')}: {chart_data}") chart_urn = builder.make_chart_urn(self.platform, chart_data.get("token", "")) chart_snapshot = ChartSnapshot( urn=chart_urn, @@ -1009,6 +1217,9 @@ def construct_chart_from_api_data( lastModified=AuditStamp(time=modified_ts, actor=modified_actor), ) + # Last refreshed ts. + last_refreshed_ts = self._parse_last_run_at(report_info) + chart_detail = ( chart_data.get("view", {}) if len(chart_data.get("view", {})) != 0 @@ -1024,7 +1235,12 @@ def construct_chart_from_api_data( or chart_detail.get("chartDescription") or "" ) - title = chart_detail.get("title") or chart_detail.get("chartTitle") or "" + + title = ( + chart_detail.get("title") + or chart_detail.get("chartTitle") + or f"Chart {index}" + ) # create datasource urn custom_properties = self.construct_chart_custom_properties( @@ -1032,15 +1248,16 @@ def construct_chart_from_api_data( ) query_urn = self.get_dataset_urn_from_query(query) - custom_properties["upstream_fields"] = "profile_id" + # Chart Info chart_info = ChartInfoClass( type=chart_type, description=description, title=title, lastModified=last_modified, - chartUrl=f"{self.config.connect_uri}" - f"{chart_data.get('_links', {}).get('report_viz_web', {}).get('href', '')}", + lastRefreshed=last_refreshed_ts, + # The links href starts with a slash already. + chartUrl=f"{self.config.connect_uri}{chart_data.get('_links', {}).get('report_viz_web', {}).get('href', '')}", inputs=[query_urn], customProperties=custom_properties, inputEdges=[], @@ -1049,10 +1266,28 @@ def construct_chart_from_api_data( query_urn = self.get_dataset_urn_from_query(query) yield from self.get_input_fields(chart_urn, chart_data, chart_fields, query_urn) + + yield MetadataChangeProposalWrapper( + entityUrn=chart_urn, + aspect=SubTypesClass(typeNames=[BIAssetSubTypes.MODE_CHART]), + ).as_workunit() + # Browse Path + space_name = self.space_tokens[space_token] + report_name = report_info["name"] + path = f"/mode/{self.config.workspace}/{space_name}/{report_name}/{query_name}/{title}" browse_path = BrowsePathsClass(paths=[path]) chart_snapshot.aspects.append(browse_path) + # Browse path v2 + browse_path_v2 = BrowsePathsV2Class( + path=self._browse_path_chart(space_token, report_info, query), + ) + yield MetadataChangeProposalWrapper( + entityUrn=chart_urn, + aspect=browse_path_v2, + ).as_workunit() + # Query chart_query = ChartQueryClass( rawQuery=query.get("raw_query", ""), @@ -1073,7 +1308,7 @@ def construct_chart_from_api_data( yield MetadataWorkUnit(id=chart_snapshot.urn, mce=mce) @lru_cache(maxsize=None) - def _get_reports(self, space_token: str) -> list: + def _get_reports(self, space_token: str) -> List[dict]: reports = [] try: reports_json = self._get_request_json( @@ -1179,26 +1414,57 @@ def create_embed_aspect_mcp( aspect=EmbedClass(renderUrl=embed_url), ) + def gen_space_key(self, space_token: str) -> SpaceKey: + return SpaceKey(platform=self.platform, space_token=space_token) + + def construct_space_container( + self, space_token: str, space_name: str + ) -> Iterable[MetadataWorkUnit]: + key = self.gen_space_key(space_token) + yield from gen_containers( + container_key=key, + name=space_name, + sub_types=[BIContainerSubTypes.MODE_COLLECTION], + # TODO: Support extracting the documentation for a space. + ) + + # We have a somewhat atypical browse path here, since we include the workspace name + # as what's effectively but not officially a platform instance. + yield MetadataChangeProposalWrapper( + entityUrn=key.as_urn(), + aspect=BrowsePathsV2Class(path=self._browse_path_space()), + ).as_workunit() + def emit_dashboard_mces(self) -> Iterable[MetadataWorkUnit]: for space_token, space_name in self.space_tokens.items(): + yield from self.construct_space_container(space_token, space_name) + reports = self._get_reports(space_token) for report in reports: - dashboard_snapshot_from_report = self.construct_dashboard( - space_name, report + logger.debug( + f"Report: name: {report.get('name')} token: {report.get('token')}" ) + dashboard_tuple_from_report = self.construct_dashboard( + space_token=space_token, report_info=report + ) + + if dashboard_tuple_from_report is None: + continue + ( + dashboard_snapshot_from_report, + browse_mcpw, + ) = dashboard_tuple_from_report mce = MetadataChangeEvent( proposedSnapshot=dashboard_snapshot_from_report ) mcpw = MetadataChangeProposalWrapper( - entityType="dashboard", - changeType=ChangeTypeClass.UPSERT, entityUrn=dashboard_snapshot_from_report.urn, - aspectName="subTypes", - aspect=SubTypesClass(typeNames=["Report"]), + aspect=SubTypesClass(typeNames=[BIAssetSubTypes.MODE_REPORT]), ) yield mcpw.as_workunit() + yield browse_mcpw.as_workunit() usage_statistics = DashboardUsageStatisticsClass( timestampMillis=round(datetime.now().timestamp() * 1000), @@ -1206,10 +1472,7 @@ def emit_dashboard_mces(self) -> Iterable[MetadataWorkUnit]: ) yield MetadataChangeProposalWrapper( - entityType="dashboard", - changeType=ChangeTypeClass.UPSERT, entityUrn=dashboard_snapshot_from_report.urn, - aspectName="dashboardUsageStatistics", aspect=usage_statistics, ).as_workunit() @@ -1223,34 +1486,52 @@ def emit_dashboard_mces(self) -> Iterable[MetadataWorkUnit]: def emit_chart_mces(self) -> Iterable[MetadataWorkUnit]: # Space/collection -> report -> query -> Chart - for space_token, space_name in self.space_tokens.items(): + for space_token in self.space_tokens.keys(): reports = self._get_reports(space_token) for report in reports: report_token = report.get("token", "") + + if report.get("imported_datasets"): + # The connector doesn't support imported datasets yet. + # For now, we just keep this in the report to track what we're missing. + imported_datasets = [ + imported_dataset.get("name") or str(imported_dataset) + for imported_dataset in report["imported_datasets"] + ] + self.report.dropped_imported_datasets.setdefault( + report_token, LossyList() + ).extend(imported_datasets) + queries = self._get_queries(report_token) for query in queries: - query_mcps = self.construct_query_from_api_data(report_token, query) - chart_fields: List[str] = [] + query_mcps = self.construct_query_from_api_data( + report_token, + query, + space_token=space_token, + report_info=report, + ) + chart_fields: Dict[str, SchemaFieldClass] = {} for wu in query_mcps: - if ( - isinstance(wu.metadata, MetadataChangeProposalWrapper) - and wu.metadata.aspectName == "schemaMetadata" - ): - if isinstance(wu.metadata.aspect, SchemaMetadataClass): - schema_metadata = wu.metadata.aspect - for field in schema_metadata.fields: - chart_fields.append(field.fieldPath) + if isinstance( + wu.metadata, MetadataChangeProposalWrapper + ) and isinstance(wu.metadata.aspect, SchemaMetadataClass): + schema_metadata = wu.metadata.aspect + for field in schema_metadata.fields: + chart_fields.setdefault(field.fieldPath, field) yield wu charts = self._get_charts(report_token, query.get("token", "")) # build charts - for chart in charts: - view = chart.get("view") or chart.get("view_vegas") - chart_name = view.get("title") or view.get("chartTitle") or "" - path = f"/mode/{self.config.workspace}/{space_name}/{report.get('name')}/{query.get('name')}/{chart_name}" + for i, chart in enumerate(charts): yield from self.construct_chart_from_api_data( - chart, chart_fields, query, path + i, + chart, + chart_fields, + query, + space_token=space_token, + report_info=report, + query_name=query["name"], ) @classmethod diff --git a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py index cd78d1c030957..af6b44677dffa 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mongodb.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mongodb.py @@ -71,7 +71,7 @@ # See https://docs.mongodb.com/manual/reference/local-database/ and # https://docs.mongodb.com/manual/reference/config-database/ and # https://stackoverflow.com/a/48273736/5004662. -DENY_DATABASE_LIST = set(["admin", "config", "local"]) +DENY_DATABASE_LIST = {"admin", "config", "local"} class HostingEnvironment(Enum): diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py index d147ca3910d48..49998ffbce879 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/config.py @@ -391,10 +391,11 @@ class PowerBiDashboardSourceConfig( # Enable advance sql construct enable_advance_lineage_sql_construct: bool = pydantic.Field( - default=False, + default=True, description="Whether to enable advance native sql construct for parsing like join, sub-queries. " "along this flag , the native_query_parsing should be enabled. " - "By default convert_lineage_urns_to_lowercase is enabled, in-case if you have disabled it in previous ingestion execution then it may break lineage " + "By default convert_lineage_urns_to_lowercase is enabled, in-case if you have disabled it in previous " + "ingestion execution then it may break lineage" "as this option generates the upstream datasets URN in lowercase.", ) @@ -402,7 +403,8 @@ class PowerBiDashboardSourceConfig( extract_column_level_lineage: bool = pydantic.Field( default=False, description="Whether to extract column level lineage. " - "Works only if configs `native_query_parsing`, `enable_advance_lineage_sql_construct` & `extract_lineage` are enabled. " + "Works only if configs `native_query_parsing`, `enable_advance_lineage_sql_construct` & `extract_lineage` are " + "enabled." "Works for M-Query where native SQL is used for transformation.", ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py index 52a7cb6fed0ee..27efad6dc21ca 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/native_sql_parser.py @@ -9,7 +9,7 @@ create_lineage_sql_parsed_result, ) -SPECIAL_CHARACTERS = ["#(lf)", "(lf)"] +SPECIAL_CHARACTERS = ["#(lf)", "(lf)", "#(tab)"] logger = logging.getLogger(__name__) diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py index 72f9c2167cab9..49fbf926e49be 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py @@ -889,7 +889,7 @@ def get_datasource_server( return ( data_access_func_detail.identifier_accessor.items["Name"] if data_access_func_detail.identifier_accessor is not None - else str() + else "" ) @@ -1027,6 +1027,7 @@ def create_lineage( self.current_data_platform = self.SUPPORTED_NATIVE_QUERY_DATA_PLATFORM[ data_access_tokens[0] ] + # First argument is the query sql_query: str = tree_function.strip_char_from_list( values=tree_function.remove_whitespaces_from_list( diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py index 607f314342375..16f174525254d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py @@ -580,7 +580,7 @@ def tile_custom_properties(tile: powerbi_data_classes.Tile) -> dict: ) # Browse path - browse_path = BrowsePathsClass(paths=["/powerbi/{}".format(workspace.name)]) + browse_path = BrowsePathsClass(paths=[f"/powerbi/{workspace.name}"]) browse_path_mcp = self.new_mcp( entity_type=Constant.CHART, entity_urn=chart_urn, @@ -990,7 +990,7 @@ def to_chart_mcps( ) # Browse path - browse_path = BrowsePathsClass(paths=["/powerbi/{}".format(workspace.name)]) + browse_path = BrowsePathsClass(paths=[f"/powerbi/{workspace.name}"]) browse_path_mcp = self.new_mcp( entity_type=Constant.CHART, entity_urn=chart_urn, @@ -1195,7 +1195,7 @@ class PowerBiDashboardSource(StatefulIngestionSourceBase, TestableSource): platform: str = "powerbi" def __init__(self, config: PowerBiDashboardSourceConfig, ctx: PipelineContext): - super(PowerBiDashboardSource, self).__init__(config, ctx) + super().__init__(config, ctx) self.source_config = config self.reporter = PowerBiDashboardSourceReport() self.dataplatform_instance_resolver = create_dataplatform_instance_resolver( diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py index 0d41ab00c66f5..ce4dd9a7a0c0f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py @@ -268,7 +268,7 @@ def new_powerbi_dataset(workspace_id: str, raw_instance: dict) -> PowerBIDataset return PowerBIDataset( id=raw_instance["id"], name=raw_instance.get("name"), - description=raw_instance.get("description", str()), + description=raw_instance.get("description", ""), webUrl="{}/details".format(raw_instance.get("webUrl")) if raw_instance.get("webUrl") is not None else None, diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py index 3aeffa60bc28e..fadd7a48b62f7 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py @@ -63,7 +63,7 @@ def __init__( self.__access_token_expiry_time: Optional[datetime] = None self.__tenant_id = tenant_id # Test connection by generating access token - logger.info("Trying to connect to {}".format(self._get_authority_url())) + logger.info(f"Trying to connect to {self._get_authority_url()}") # Power-Bi Auth (Service Principal Auth) self.__msal_client = msal.ConfidentialClientApplication( client_id, @@ -72,7 +72,7 @@ def __init__( ) self.get_access_token() - logger.info("Connected to {}".format(self._get_authority_url())) + logger.info(f"Connected to {self._get_authority_url()}") self._request_session = requests.Session() # set re-try parameter for request_session self._request_session.mount( @@ -124,7 +124,7 @@ def get_users(self, workspace_id: str, entity: str, entity_id: str) -> List[User pass def _get_authority_url(self): - return "{}{}".format(DataResolverBase.AUTHORITY, self.__tenant_id) + return f"{DataResolverBase.AUTHORITY}{self.__tenant_id}" def get_authorization_header(self): return {Constant.Authorization: self.get_access_token()} @@ -193,7 +193,7 @@ def get_dashboards(self, workspace: Workspace) -> List[Dashboard]: id=instance.get(Constant.ID), isReadOnly=instance.get(Constant.IS_READ_ONLY), displayName=instance.get(Constant.DISPLAY_NAME), - description=instance.get(Constant.DESCRIPTION, str()), + description=instance.get(Constant.DESCRIPTION, ""), embedUrl=instance.get(Constant.EMBED_URL), webUrl=instance.get(Constant.WEB_URL), workspace_id=workspace.id, @@ -276,7 +276,7 @@ def fetch_reports(): name=raw_instance.get(Constant.NAME), webUrl=raw_instance.get(Constant.WEB_URL), embedUrl=raw_instance.get(Constant.EMBED_URL), - description=raw_instance.get(Constant.DESCRIPTION, str()), + description=raw_instance.get(Constant.DESCRIPTION, ""), pages=self._get_pages_by_report( workspace=workspace, report_id=raw_instance[Constant.ID] ), @@ -809,7 +809,7 @@ def get_modified_workspaces(self, modified_since: str) -> List[str]: # Return scan_id of Scan created for the given workspace workspace_ids = [row["id"] for row in res.json()] - logger.debug("modified workspace_ids: {}".format(workspace_ids)) + logger.debug(f"modified workspace_ids: {workspace_ids}") return workspace_ids def get_dataset_parameters( diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py index b793929faa691..d6c7076d49507 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py @@ -143,7 +143,7 @@ class PowerBiReportServerAPI: def __init__(self, config: PowerBiReportServerAPIConfig) -> None: self.__config: PowerBiReportServerAPIConfig = config self.__auth: HttpNtlmAuth = HttpNtlmAuth( - "{}\\{}".format(self.__config.workstation_name, self.__config.username), + f"{self.__config.workstation_name}\\{self.__config.username}", self.__config.password, ) @@ -153,14 +153,14 @@ def get_auth_credentials(self): def requests_get(self, url_http: str, url_https: str, content_type: str) -> Any: try: - LOGGER.info("Request to Report URL={}".format(url_https)) + LOGGER.info(f"Request to Report URL={url_https}") response = requests.get( url=url_https, auth=self.get_auth_credentials, verify=True, ) except ConnectionError: - LOGGER.info("Request to Report URL={}".format(url_http)) + LOGGER.info(f"Request to Report URL={url_http}") response = requests.get( url=url_http, auth=self.get_auth_credentials, @@ -406,7 +406,7 @@ def to_datahub_user(self, user: CorpUser) -> List[MetadataChangeProposalWrapper] """ user_mcps = [] if user: - LOGGER.info("Converting user {} to datahub's user".format(user.username)) + LOGGER.info(f"Converting user {user.username} to datahub's user") # Create an URN for User user_urn = builder.make_user_urn(user.get_urn_part()) @@ -449,7 +449,7 @@ def to_datahub_user(self, user: CorpUser) -> List[MetadataChangeProposalWrapper] def to_datahub_work_units(self, report: Report) -> List[EquableMetadataWorkUnit]: mcps = [] user_mcps = [] - LOGGER.info("Converting Dashboard={} to DataHub Dashboard".format(report.name)) + LOGGER.info(f"Converting Dashboard={report.name} to DataHub Dashboard") # Convert user to CorpUser user_info = report.user_info.owner_to_add if user_info: diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server_domain.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server_domain.py index ee87d93774b3d..b65ae5cd2994c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server_domain.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server_domain.py @@ -39,10 +39,10 @@ def validate_diplay_name(cls, value, values): # noqa: N805 return "" def get_urn_part(self): - return "reports.{}".format(self.id) + return f"reports.{self.id}" def get_web_url(self, base_reports_url: str) -> str: - return "{}powerbi{}".format(base_reports_url, self.path) + return f"{base_reports_url}powerbi{self.path}" def get_browse_path( self, base_folder: str, workspace: str, env: str, report_directory: str @@ -57,7 +57,7 @@ class DataSet(CatalogItem): query_execution_time_out: int = Field(alias="QueryExecutionTimeOut") def get_urn_part(self): - return "datasets.{}".format(self.id) + return f"datasets.{self.id}" def __members(self): return (self.id,) @@ -339,7 +339,7 @@ class CorpUser(BaseModel): global_tags: Optional[GlobalTags] = Field(None, alias="globalTags") def get_urn_part(self): - return "{}".format(self.username) + return f"{self.username}" def __members(self): return (self.username,) diff --git a/metadata-ingestion/src/datahub/ingestion/source/pulsar.py b/metadata-ingestion/src/datahub/ingestion/source/pulsar.py index 00a49cd897d6f..7671e23928430 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/pulsar.py +++ b/metadata-ingestion/src/datahub/ingestion/source/pulsar.py @@ -53,7 +53,7 @@ logger = logging.getLogger(__name__) -class PulsarTopic(object): +class PulsarTopic: __slots__ = ["topic_parts", "fullname", "type", "tenant", "namespace", "topic"] def __init__(self, topic): @@ -65,7 +65,7 @@ def __init__(self, topic): self.topic = topic_parts[5] -class PulsarSchema(object): +class PulsarSchema: __slots__ = [ "schema_version", "schema_name", diff --git a/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_api.py b/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_api.py index 66a18873d86df..d7a040ff5f0a0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_api.py +++ b/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_api.py @@ -36,7 +36,7 @@ def __init__(self, config: QlikSourceConfig) -> None: ) self.rest_api_url = f"https://{self.config.tenant_hostname}/api/v1" # Test connection by fetching list of api keys - logger.info("Trying to connect to {}".format(self.rest_api_url)) + logger.info(f"Trying to connect to {self.rest_api_url}") self.session.get(f"{self.rest_api_url}/api-keys").raise_for_status() def _log_http_error(self, message: str) -> Any: diff --git a/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_sense.py b/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_sense.py index a5b9adae0376c..b9fd2a9c4fe22 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_sense.py +++ b/metadata-ingestion/src/datahub/ingestion/source/qlik_sense/qlik_sense.py @@ -112,7 +112,7 @@ class QlikSenseSource(StatefulIngestionSourceBase, TestableSource): platform: str = "qlik-sense" def __init__(self, config: QlikSourceConfig, ctx: PipelineContext): - super(QlikSenseSource, self).__init__(config, ctx) + super().__init__(config, ctx) self.config = config self.reporter = QlikSourceReport() try: diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage_v2.py index 45fd1477df44e..2c7ebb613c57a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/lineage_v2.py @@ -34,6 +34,7 @@ KnownQueryLineageInfo, SqlParsingAggregator, ) +from datahub.utilities.perf_timer import PerfTimer logger = logging.getLogger(__name__) @@ -93,7 +94,7 @@ def build( db_schemas: Dict[str, Dict[str, RedshiftSchema]], ) -> None: # Assume things not in `all_tables` as temp tables. - self.known_urns = set( + self.known_urns = { DatasetUrn.create_from_ids( self.platform, f"{db}.{schema}.{table.name}", @@ -103,7 +104,7 @@ def build( for db, schemas in all_tables.items() for schema, tables in schemas.items() for table in tables - ) + } self.aggregator.is_temp_table = lambda urn: urn not in self.known_urns # Handle all the temp tables up front. @@ -226,13 +227,17 @@ def _populate_lineage_agg( try: logger.debug(f"Processing {lineage_type.name} lineage query: {query}") - for lineage_row in RedshiftDataDictionary.get_lineage_rows( - conn=connection, query=query - ): - processor(lineage_row) + timer = self.report.lineage_phases_timer.setdefault( + lineage_type.name, PerfTimer() + ) + with timer: + for lineage_row in RedshiftDataDictionary.get_lineage_rows( + conn=connection, query=query + ): + processor(lineage_row) except Exception as e: self.report.warning( - f"extract-{lineage_type.name}", + f"lineage-v2-extract-{lineage_type.name}", f"Error was {e}, {traceback.format_exc()}", ) self._lineage_v1.report_status(f"extract-{lineage_type.name}", False) diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/report.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/report.py index e2a035091d0ad..2e6cb8051c91e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/report.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/report.py @@ -8,6 +8,7 @@ from datahub.ingestion.source_report.time_window import BaseTimeWindowReport from datahub.sql_parsing.sql_parsing_aggregator import SqlAggregatorReport from datahub.utilities.lossy_collections import LossyDict +from datahub.utilities.perf_timer import PerfTimer from datahub.utilities.stats_collections import TopKDict @@ -55,6 +56,7 @@ class RedshiftReport( # lineage/usage v2 sql_aggregator: Optional[SqlAggregatorReport] = None + lineage_phases_timer: Dict[str, PerfTimer] = field(default_factory=dict) def report_dropped(self, key: str) -> None: self.filtered.append(key) diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/config.py b/metadata-ingestion/src/datahub/ingestion/source/s3/config.py index 55e340e2850d5..921ab27564250 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/s3/config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/s3/config.py @@ -111,9 +111,9 @@ def check_path_specs_and_infer_platform( raise ValueError("path_specs must not be empty") # Check that all path specs have the same platform. - guessed_platforms = set( + guessed_platforms = { "s3" if path_spec.is_s3 else "file" for path_spec in path_specs - ) + } if len(guessed_platforms) > 1: raise ValueError( f"Cannot have multiple platforms in path_specs: {guessed_platforms}" diff --git a/metadata-ingestion/src/datahub/ingestion/source/salesforce.py b/metadata-ingestion/src/datahub/ingestion/source/salesforce.py index 2960411574430..946fdcedc571f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/salesforce.py +++ b/metadata-ingestion/src/datahub/ingestion/source/salesforce.py @@ -353,7 +353,7 @@ def get_custom_object_details(self, sObjectDeveloperName: str) -> dict: self.base_url + "tooling/query/?q=SELECT Description, Language, ManageableState, " + "CreatedDate, CreatedBy.Username, LastModifiedDate, LastModifiedBy.Username " - + "FROM CustomObject where DeveloperName='{0}'".format(sObjectDeveloperName) + + f"FROM CustomObject where DeveloperName='{sObjectDeveloperName}'" ) custom_objects_response = self.sf._call_salesforce("GET", query_url).json() if len(custom_objects_response["records"]) > 0: @@ -537,20 +537,22 @@ def get_profile_workunit( # Here field description is created from label, description and inlineHelpText def _get_field_description(self, field: dict, customField: dict) -> str: - desc = ( - "\\" + field["Label"] if field["Label"].startswith("#") else field["Label"] - ) + if "Label" not in field or field["Label"] is None: + desc = "" + elif field["Label"].startswith("#"): + desc = "\\" + field["Label"] + else: + desc = field["Label"] - for key in ["FieldDefinition", "InlineHelpText"]: - text: Optional[str] = "" - if isinstance(field.get(key), dict): - text = field[key].get("Description") - else: - text = field.get(key) + text = field.get("FieldDefinition", {}).get("Description", None) + if text: + prefix = "\\" if text.startswith("#") else "" + desc += f"\n\n{prefix}{text}" - if text: - prefix = "\\" if text.startswith("#") else "" - desc += f"\n\n{prefix}{text}" + text = field.get("InlineHelpText", None) + if text: + prefix = "\\" if text.startswith("#") else "" + desc += f"\n\n{prefix}{text}" return desc @@ -654,7 +656,7 @@ def get_schema_metadata_workunit( + "Precision, Scale, Length, Digits ,FieldDefinition.IsIndexed, IsUnique," + "IsCompound, IsComponent, ReferenceTo, FieldDefinition.ComplianceGroup," + "RelationshipName, IsNillable, FieldDefinition.Description, InlineHelpText " - + "FROM EntityParticle WHERE EntityDefinitionId='{0}'".format( + + "FROM EntityParticle WHERE EntityDefinitionId='{}'".format( sObject["DurableId"] ) ) @@ -663,16 +665,14 @@ def get_schema_metadata_workunit( "GET", sObject_fields_query_url ).json() - logger.debug( - "Received Salesforce {sObject} fields response".format(sObject=sObjectName) - ) + logger.debug(f"Received Salesforce {sObjectName} fields response") sObject_custom_fields_query_url = ( self.base_url + "tooling/query?q=SELECT " + "DeveloperName,CreatedDate,CreatedBy.Username,InlineHelpText," + "LastModifiedDate,LastModifiedBy.Username " - + "FROM CustomField WHERE EntityDefinitionId='{0}'".format( + + "FROM CustomField WHERE EntityDefinitionId='{}'".format( sObject["DurableId"] ) ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/schema/json_schema.py b/metadata-ingestion/src/datahub/ingestion/source/schema/json_schema.py index c7e8a15d8dfa4..635e894d18c7e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/schema/json_schema.py +++ b/metadata-ingestion/src/datahub/ingestion/source/schema/json_schema.py @@ -212,7 +212,7 @@ def _load_json_schema(filename, loader, use_id_as_base_uri): """Loads the given schema file""" path = Path(filename).resolve() base_path = dirname(str(path)) - base_uri = "file://{}/".format(base_path) + base_uri = f"file://{base_path}/" with open(path) as schema_file: logger.info(f"Opening file {path}") @@ -243,7 +243,7 @@ def stringreplaceloader(match_string, replace_string, uri, **kwargs): return jsonref.jsonloader(uri, **kwargs) def __init__(self, ctx: PipelineContext, config: JsonSchemaSourceConfig): - super(JsonSchemaSource, self).__init__(ctx=ctx, config=config) + super().__init__(ctx=ctx, config=config) self.config = config self.report = StaleEntityRemovalSourceReport() diff --git a/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma_api.py b/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma_api.py index c335bee15931d..c2c28419ebcfd 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma_api.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sigma/sigma_api.py @@ -24,7 +24,7 @@ def __init__(self, config: SigmaSourceConfig) -> None: self.users: Dict[str, str] = {} self.session = requests.Session() # Test connection by generating access token - logger.info("Trying to connect to {}".format(self.config.api_url)) + logger.info(f"Trying to connect to {self.config.api_url}") self._generate_token() def _generate_token(self): diff --git a/metadata-ingestion/src/datahub/ingestion/source/slack/slack.py b/metadata-ingestion/src/datahub/ingestion/source/slack/slack.py index ef7301238e452..746f71fb0af37 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/slack/slack.py +++ b/metadata-ingestion/src/datahub/ingestion/source/slack/slack.py @@ -97,6 +97,7 @@ def __init__(self, ctx: PipelineContext, config: SlackSourceConfig): self.rate_limiter = RateLimiter( max_calls=self.config.api_requests_per_min, period=60 ) + self._use_users_info = False @classmethod def create(cls, config_dict, ctx): @@ -239,19 +240,31 @@ def get_public_channels(self) -> Iterable[MetadataWorkUnit]: break def populate_user_profile(self, user_obj: CorpUser) -> None: + if not user_obj.slack_id: + return try: # https://api.slack.com/methods/users.profile.get with self.rate_limiter: - user_profile_res = self.get_slack_client().users_profile_get( - user=user_obj.slack_id - ) + if self._use_users_info: + user_profile_res = self.get_slack_client().users_info( + user=user_obj.slack_id + ) + user_profile_res = user_profile_res.get("user", {}) + else: + user_profile_res = self.get_slack_client().users_profile_get( + user=user_obj.slack_id + ) + logger.debug(f"User profile: {user_profile_res}") user_profile = user_profile_res.get("profile", {}) user_obj.title = user_profile.get("title") user_obj.image_url = user_profile.get("image_192") user_obj.phone = user_profile.get("phone") except Exception as e: if "missing_scope" in str(e): - raise e + if self._use_users_info: + raise e + self._use_users_info = True + self.populate_user_profile(user_obj) return def populate_slack_id_from_email(self, user_obj: CorpUser) -> None: diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py index 69a6b8e29c881..b12ef4d19c45c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_lineage_v2.py @@ -369,7 +369,9 @@ def _fetch_upstream_lineages_for_tables(self) -> Iterable[UpstreamLineageEdge]: ) try: for db_row in self.query(query): - yield UpstreamLineageEdge.parse_obj(db_row) + edge = self._process_upstream_lineage_row(db_row) + if edge: + yield edge except Exception as e: if isinstance(e, SnowflakePermissionError): error_msg = "Failed to get table/view to table lineage. Please grant imported privileges on SNOWFLAKE database. " @@ -382,6 +384,19 @@ def _fetch_upstream_lineages_for_tables(self) -> Iterable[UpstreamLineageEdge]: ) self.report_status(TABLE_LINEAGE, False) + def _process_upstream_lineage_row( + self, db_row: dict + ) -> Optional[UpstreamLineageEdge]: + try: + return UpstreamLineageEdge.parse_obj(db_row) + except Exception as e: + self.report.num_upstream_lineage_edge_parsing_failed += 1 + self.report_warning( + f"Parsing lineage edge failed due to error {e}", + db_row.get("DOWNSTREAM_TABLE_NAME") or "", + ) + return None + def map_query_result_upstreams( self, upstream_tables: Optional[List[UpstreamTableNode]], query_id: str ) -> List[UrnStr]: diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py index 9a37f779bbcd5..5e6ade29344eb 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_profiler.py @@ -102,8 +102,26 @@ def get_batch_kwargs( # We are using fraction-based sampling here, instead of fixed-size sampling because # Fixed-size sampling can be slower than equivalent fraction-based sampling # as per https://docs.snowflake.com/en/sql-reference/constructs/sample#performance-considerations - sample_pc = 100 * self.config.profiling.sample_size / table.rows_count - custom_sql = f'select * from "{db_name}"."{schema_name}"."{table.name}" TABLESAMPLE ({sample_pc:.8f})' + estimated_block_row_count = 500_000 + block_profiling_min_rows = 100 * estimated_block_row_count + + tablename = f'"{db_name}"."{schema_name}"."{table.name}"' + sample_pc = self.config.profiling.sample_size / table.rows_count + + overgeneration_factor = 1000 + if ( + table.rows_count > block_profiling_min_rows + and table.rows_count + > self.config.profiling.sample_size * overgeneration_factor + ): + # If the table is significantly larger than the sample size, do a first pass + # using block sampling to improve performance. We generate a table 1000 times + # larger than the target sample size, and then use normal sampling for the + # final size reduction. + tablename = f"(SELECT * FROM {tablename} TABLESAMPLE BLOCK ({100 * overgeneration_factor * sample_pc:.8f}))" + sample_pc = 1 / overgeneration_factor + + custom_sql = f"select * from {tablename} TABLESAMPLE BERNOULLI ({100 * sample_pc:.8f})" return { **super().get_batch_kwargs(table, schema_name, db_name), # Lowercase/Mixedcase table names in Snowflake do not work by default. diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py index 5c04af3290ab1..1849af1db50cc 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_query.py @@ -558,6 +558,7 @@ def usage_per_object_per_time_bucket_for_time_window( include_top_n_queries: bool, email_domain: Optional[str], email_filter: AllowDenyPattern, + table_deny_pattern: List[str] = DEFAULT_TABLES_DENY_LIST, ) -> str: if not include_top_n_queries: top_n_queries = 0 @@ -565,6 +566,12 @@ def usage_per_object_per_time_bucket_for_time_window( time_bucket_size == BucketDuration.DAY or time_bucket_size == BucketDuration.HOUR ) + + temp_table_filter = create_deny_regex_sql_filter( + table_deny_pattern, + ["object_name"], + ) + objects_column = ( "BASE_OBJECTS_ACCESSED" if use_base_objects else "DIRECT_OBJECTS_ACCESSED" ) @@ -604,6 +611,7 @@ def usage_per_object_per_time_bucket_for_time_window( ) t, lateral flatten(input => t.{objects_column}) object + {("where " + temp_table_filter) if temp_table_filter else ""} ) , field_access_history AS diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py index 40112eed5a463..d79ed384d755b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_report.py @@ -2,6 +2,7 @@ from datetime import datetime from typing import Dict, List, MutableSet, Optional +from datahub.ingestion.api.report import Report from datahub.ingestion.glossary.classification_mixin import ClassificationReportMixin from datahub.ingestion.source.snowflake.constants import SnowflakeEdition from datahub.ingestion.source.sql.sql_generic_profiler import ProfilingSqlReport @@ -11,6 +12,19 @@ from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport from datahub.ingestion.source_report.time_window import BaseTimeWindowReport from datahub.sql_parsing.sql_parsing_aggregator import SqlAggregatorReport +from datahub.utilities.perf_timer import PerfTimer + + +@dataclass +class SnowflakeUsageAggregationReport(Report): + query_secs: float = -1 + query_row_count: int = -1 + result_fetch_timer: PerfTimer = field(default_factory=PerfTimer) + result_skip_timer: PerfTimer = field(default_factory=PerfTimer) + result_map_timer: PerfTimer = field(default_factory=PerfTimer) + users_map_timer: PerfTimer = field(default_factory=PerfTimer) + queries_map_timer: PerfTimer = field(default_factory=PerfTimer) + fields_map_timer: PerfTimer = field(default_factory=PerfTimer) @dataclass @@ -31,6 +45,10 @@ class SnowflakeUsageReport: usage_end_time: Optional[datetime] = None stateful_usage_ingestion_enabled: bool = False + usage_aggregation: SnowflakeUsageAggregationReport = field( + default_factory=SnowflakeUsageAggregationReport + ) + @dataclass class SnowflakeReport(ProfilingSqlReport, BaseTimeWindowReport): @@ -83,12 +101,10 @@ class SnowflakeV2Report( include_technical_schema: bool = False include_column_lineage: bool = False - usage_aggregation_query_secs: float = -1 table_lineage_query_secs: float = -1 - # view_lineage_parse_secs: float = -1 - # view_upstream_lineage_query_secs: float = -1 - # view_downstream_lineage_query_secs: float = -1 external_lineage_queries_secs: float = -1 + num_tables_with_known_upstreams: int = 0 + num_upstream_lineage_edge_parsing_failed: int = 0 # Reports how many times we reset in-memory `functools.lru_cache` caches of data, # which occurs when we occur a different database / schema. @@ -115,14 +131,6 @@ class SnowflakeV2Report( edition: Optional[SnowflakeEdition] = None - # num_tables_with_external_upstreams_only: int = 0 - num_tables_with_known_upstreams: int = 0 - # num_views_with_upstreams: int = 0 - - # num_view_definitions_parsed: int = 0 - # num_view_definitions_failed_parsing: int = 0 - # num_view_definitions_failed_column_parsing: int = 0 - def report_entity_scanned(self, name: str, ent_type: str = "table") -> None: """ Entity could be a view or a table or a schema or a database diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py index f75e994303954..e8b56a01944ad 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_usage_v2.py @@ -36,8 +36,9 @@ ) from datahub.metadata.com.linkedin.pegasus2avro.timeseries import TimeWindowSize from datahub.metadata.schema_classes import OperationClass, OperationTypeClass +from datahub.sql_parsing.sqlglot_utils import try_format_query from datahub.utilities.perf_timer import PerfTimer -from datahub.utilities.sql_formatter import format_sql_query, trim_query +from datahub.utilities.sql_formatter import trim_query logger: logging.Logger = logging.getLogger(__name__) @@ -216,6 +217,7 @@ def _get_workunits_internal( include_top_n_queries=self.config.include_top_n_queries, email_domain=self.config.email_domain, email_filter=self.config.user_email_pattern, + table_deny_pattern=self.config.temporary_tables_pattern, ), ) except Exception as e: @@ -227,29 +229,46 @@ def _get_workunits_internal( self.report_status(USAGE_EXTRACTION_USAGE_AGGREGATION, False) return - self.report.usage_aggregation_query_secs = timer.elapsed_seconds() + self.report.usage_aggregation.query_secs = timer.elapsed_seconds() + self.report.usage_aggregation.query_row_count = results.rowcount - for row in results: - if not self._is_dataset_pattern_allowed( - row["OBJECT_NAME"], - row["OBJECT_DOMAIN"], - ): - continue - - dataset_identifier = self.get_dataset_identifier_from_qualified_name( - row["OBJECT_NAME"] - ) - if dataset_identifier not in discovered_datasets: - logger.debug( - f"Skipping usage for table {dataset_identifier}, as table schema is not accessible or not allowed by recipe." - ) - continue + with self.report.usage_aggregation.result_fetch_timer as fetch_timer: + for row in results: + with fetch_timer.pause(), self.report.usage_aggregation.result_skip_timer as skip_timer: + if results.rownumber is not None and results.rownumber % 1000 == 0: + logger.debug(f"Processing usage row number {results.rownumber}") + logger.debug(self.report.usage_aggregation.as_string()) - yield from self.build_usage_statistics_for_dataset(dataset_identifier, row) + if not self._is_dataset_pattern_allowed( + row["OBJECT_NAME"], + row["OBJECT_DOMAIN"], + ): + logger.debug( + f"Skipping usage for {row['OBJECT_DOMAIN']} {row['OBJECT_NAME']}, as table is not allowed by recipe." + ) + continue + + dataset_identifier = ( + self.get_dataset_identifier_from_qualified_name( + row["OBJECT_NAME"] + ) + ) + if dataset_identifier not in discovered_datasets: + logger.debug( + f"Skipping usage for {row['OBJECT_DOMAIN']} {dataset_identifier}, as table is not accessible." + ) + continue + with skip_timer.pause(), self.report.usage_aggregation.result_map_timer as map_timer: + wu = self.build_usage_statistics_for_dataset( + dataset_identifier, row + ) + if wu: + with map_timer.pause(): + yield wu def build_usage_statistics_for_dataset( self, dataset_identifier: str, row: dict - ) -> Iterable[MetadataWorkUnit]: + ) -> Optional[MetadataWorkUnit]: try: stats = DatasetUsageStatistics( timestampMillis=int(row["BUCKET_START_TIME"].timestamp() * 1000), @@ -258,18 +277,15 @@ def build_usage_statistics_for_dataset( ), totalSqlQueries=row["TOTAL_QUERIES"], uniqueUserCount=row["TOTAL_USERS"], - topSqlQueries=self._map_top_sql_queries( - json.loads(row["TOP_SQL_QUERIES"]) - ) - if self.config.include_top_n_queries - else None, - userCounts=self._map_user_counts( - json.loads(row["USER_COUNTS"]), + topSqlQueries=( + self._map_top_sql_queries(row["TOP_SQL_QUERIES"]) + if self.config.include_top_n_queries + else None ), - fieldCounts=self._map_field_counts(json.loads(row["FIELD_COUNTS"])), + userCounts=self._map_user_counts(row["USER_COUNTS"]), + fieldCounts=self._map_field_counts(row["FIELD_COUNTS"]), ) - - yield MetadataChangeProposalWrapper( + return MetadataChangeProposalWrapper( entityUrn=self.dataset_urn_builder(dataset_identifier), aspect=stats ).as_workunit() except Exception as e: @@ -281,61 +297,79 @@ def build_usage_statistics_for_dataset( "Failed to parse usage statistics for dataset", dataset_identifier ) - def _map_top_sql_queries(self, top_sql_queries: Dict) -> List[str]: - budget_per_query: int = int( - self.config.queries_character_limit / self.config.top_n_queries - ) - return sorted( - [ - trim_query(format_sql_query(query), budget_per_query) - if self.config.format_sql_queries - else trim_query(query, budget_per_query) - for query in top_sql_queries - ] - ) + return None + + def _map_top_sql_queries(self, top_sql_queries_str: str) -> List[str]: + with self.report.usage_aggregation.queries_map_timer: + top_sql_queries = json.loads(top_sql_queries_str) + budget_per_query: int = int( + self.config.queries_character_limit / self.config.top_n_queries + ) + return sorted( + [ + ( + trim_query( + try_format_query(query, self.platform), budget_per_query + ) + if self.config.format_sql_queries + else trim_query(query, budget_per_query) + ) + for query in top_sql_queries + ] + ) def _map_user_counts( self, - user_counts: Dict, + user_counts_str: str, ) -> List[DatasetUserUsageCounts]: - filtered_user_counts = [] - for user_count in user_counts: - user_email = user_count.get("email") - if not user_email and self.config.email_domain and user_count["user_name"]: - user_email = "{0}@{1}".format( - user_count["user_name"], self.config.email_domain - ).lower() - if not user_email or not self.config.user_email_pattern.allowed(user_email): - continue - - filtered_user_counts.append( - DatasetUserUsageCounts( - user=make_user_urn( - self.get_user_identifier( - user_count["user_name"], - user_email, - self.config.email_as_user_identifier, - ) - ), - count=user_count["total"], - # NOTE: Generated emails may be incorrect, as email may be different than - # username@email_domain - userEmail=user_email, + with self.report.usage_aggregation.users_map_timer: + user_counts = json.loads(user_counts_str) + filtered_user_counts = [] + for user_count in user_counts: + user_email = user_count.get("email") + if ( + not user_email + and self.config.email_domain + and user_count["user_name"] + ): + user_email = "{}@{}".format( + user_count["user_name"], self.config.email_domain + ).lower() + if not user_email or not self.config.user_email_pattern.allowed( + user_email + ): + continue + + filtered_user_counts.append( + DatasetUserUsageCounts( + user=make_user_urn( + self.get_user_identifier( + user_count["user_name"], + user_email, + self.config.email_as_user_identifier, + ) + ), + count=user_count["total"], + # NOTE: Generated emails may be incorrect, as email may be different than + # username@email_domain + userEmail=user_email, + ) ) + return sorted(filtered_user_counts, key=lambda v: v.user) + + def _map_field_counts(self, field_counts_str: str) -> List[DatasetFieldUsageCounts]: + with self.report.usage_aggregation.fields_map_timer: + field_counts = json.loads(field_counts_str) + return sorted( + [ + DatasetFieldUsageCounts( + fieldPath=self.snowflake_identifier(field_count["col"]), + count=field_count["total"], + ) + for field_count in field_counts + ], + key=lambda v: v.fieldPath, ) - return sorted(filtered_user_counts, key=lambda v: v.user) - - def _map_field_counts(self, field_counts: Dict) -> List[DatasetFieldUsageCounts]: - return sorted( - [ - DatasetFieldUsageCounts( - fieldPath=self.snowflake_identifier(field_count["col"]), - count=field_count["total"], - ) - for field_count in field_counts - ], - key=lambda v: v.fieldPath, - ) def _get_snowflake_history(self) -> Iterable[SnowflakeJoinedAccessEvent]: logger.info("Getting access history") @@ -438,9 +472,11 @@ def _get_operation_aspect_work_unit( lastUpdatedTimestamp=last_updated_timestamp, actor=user_urn, operationType=operation_type, - customOperationType=query_type - if operation_type is OperationTypeClass.CUSTOM - else None, + customOperationType=( + query_type + if operation_type is OperationTypeClass.CUSTOM + else None + ), ) mcp = MetadataChangeProposalWrapper( entityUrn=self.dataset_urn_builder(dataset_identifier), diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py index af8d8824a4b17..5708b9f168c51 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_utils.py @@ -37,7 +37,7 @@ def get_connection(self) -> SnowflakeConnection: class SnowflakeQueryMixin: def query(self: SnowflakeQueryProtocol, query: str) -> Any: try: - self.logger.debug("Query : {}".format(query)) + self.logger.debug(f"Query : {query}") resp = self.get_connection().cursor(DictCursor).execute(query) return resp diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py index 9344e030d749f..25626d434f2ef 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py @@ -366,7 +366,7 @@ class SnowflakePrivilege: object_type: str def query(query): - logger.info("Query : {}".format(query)) + logger.info(f"Query : {query}") resp = conn.cursor().execute(query) return resp diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py b/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py index 84c1d3844a7b4..b2c40f914bddc 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/clickhouse.py @@ -286,7 +286,7 @@ def get_view_names(self, connection, schema=None, **kw): # when reflecting schema for multiple tables at once. @reflection.cache # type: ignore def _get_schema_column_info(self, connection, schema=None, **kw): - schema_clause = "database = '{schema}'".format(schema=schema) if schema else "1" + schema_clause = f"database = '{schema}'" if schema else "1" all_columns = defaultdict(list) result = connection.execute( text( @@ -346,7 +346,7 @@ def _get_column_info(self, name, format_type, comment): @reflection.cache # type: ignore def get_columns(self, connection, table_name, schema=None, **kw): if not schema: - query = "DESCRIBE TABLE {}".format(self._quote_table_name(table_name)) + query = f"DESCRIBE TABLE {self._quote_table_name(table_name)}" cols = self._execute(connection, query) else: cols = self._get_clickhouse_columns(connection, table_name, schema, **kw) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py b/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py index 003732236ba80..95ce534968df5 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/hive.py @@ -74,7 +74,9 @@ def dbapi_get_columns_patched(self, connection, table_name, schema=None, **kw): coltype = _type_map[col_type] except KeyError: util.warn( - "Did not recognize type '%s' of column '%s'" % (col_type, col_name) + "Did not recognize type '{}' of column '{}'".format( + col_type, col_name + ) ) coltype = types.NullType # type: ignore result.append( @@ -112,7 +114,7 @@ def get_view_definition_patched(self, connection, view_name, schema=None, **kw): self.identifier_preparer.quote_identifier(schema), self.identifier_preparer.quote_identifier(view_name), ) - row = connection.execute("SHOW CREATE TABLE {}".format(full_table)).fetchone() + row = connection.execute(f"SHOW CREATE TABLE {full_table}").fetchone() return row[0] diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/hive_metastore.py b/metadata-ingestion/src/datahub/ingestion/source/sql/hive_metastore.py index db702df9ddc92..944b8a080cb57 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/hive_metastore.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/hive_metastore.py @@ -130,8 +130,8 @@ class HiveMetastore(BasicSQLAlchemyConfig): ) enable_properties_merge: bool = Field( - default=False, - description="By default, the connector overwrites properties every time. Set this to True to enable merging of properties with what exists on the server.", + default=True, + description="By default, the connector enables merging of properties with what exists on the server. Set this to False to enable the default connector behavior of overwriting properties on each ingestion.", ) simplify_nested_field_paths: bool = Field( diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py index 0a67d6228e6db..dcc1340c81d7b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/oracle.py @@ -226,13 +226,13 @@ def get_columns( col.default_on_null, ( SELECT id.generation_type || ',' || id.IDENTITY_OPTIONS - FROM DBA_TAB_IDENTITY_COLS%(dblink)s id + FROM DBA_TAB_IDENTITY_COLS{dblink} id WHERE col.table_name = id.table_name AND col.column_name = id.column_name AND col.owner = id.owner - ) AS identity_options""" % { - "dblink": dblink - } + ) AS identity_options""".format( + dblink=dblink + ) else: identity_cols = "NULL as default_on_null, NULL as identity_options" diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index 59819db8b2dc9..3091791551827 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -326,7 +326,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource): """A Base class for all SQL Sources that use SQLAlchemy to extend""" def __init__(self, config: SQLCommonConfig, ctx: PipelineContext, platform: str): - super(SQLAlchemySource, self).__init__(config, ctx) + super().__init__(config, ctx) self.config = config self.platform = platform self.report: SQLSourceReport = SQLSourceReport() diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py index dedb6eedd5ee4..8ea4209784063 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_types.py @@ -79,6 +79,7 @@ "regtype": None, "regrole": None, "regnamespace": None, + "super": None, "uuid": StringType, "pg_lsn": None, "tsvector": None, # text search vector diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_utils.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_utils.py index 16655d1748287..f45147223b888 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_utils.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_utils.py @@ -210,8 +210,7 @@ def gen_lineage( ).as_workunit() ] - for wu in lineage_workunits: - yield wu + yield from lineage_workunits # downgrade a schema field diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py b/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py index e1c47acbc4b87..c79af14780874 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/trino.py @@ -86,7 +86,7 @@ register_custom_type(datatype.JSON, RecordTypeClass) -@functools.lru_cache() +@functools.lru_cache def gen_catalog_connector_dict(engine: Engine) -> Dict[str, str]: query = dedent( """ @@ -473,7 +473,7 @@ def _parse_struct_fields(parts): "type": "record", "name": "__struct_{}".format(str(uuid.uuid4()).replace("-", "")), "fields": fields, - "native_data_type": "ROW({})".format(parts), + "native_data_type": f"ROW({parts})", } diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py b/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py index 738cc7e321764..7534f1295c528 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/vertica.py @@ -123,7 +123,7 @@ def clean_host_port(cls, v): class VerticaSource(SQLAlchemySource): def __init__(self, config: VerticaConfig, ctx: PipelineContext): # self.platform = platform - super(VerticaSource, self).__init__(config, ctx, "vertica") + super().__init__(config, ctx, "vertica") self.report: SQLSourceReport = VerticaSourceReport() self.config: VerticaConfig = config diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/stale_entity_removal_handler.py b/metadata-ingestion/src/datahub/ingestion/source/state/stale_entity_removal_handler.py index b80067aa0892c..0145c922696e8 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/state/stale_entity_removal_handler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/state/stale_entity_removal_handler.py @@ -164,6 +164,9 @@ def set_job_id(self, unique_id): def is_checkpointing_enabled(self) -> bool: return self.checkpointing_enabled + def _get_state_obj(self): + return self.state_type_class() + def create_checkpoint(self) -> Optional[Checkpoint]: if self.is_checkpointing_enabled() and not self._ignore_new_state(): assert self.stateful_ingestion_config is not None @@ -172,7 +175,7 @@ def create_checkpoint(self) -> Optional[Checkpoint]: job_name=self.job_id, pipeline_name=self.pipeline_name, run_id=self.run_id, - state=self.state_type_class(), + state=self._get_state_obj(), ) return None @@ -255,9 +258,13 @@ def gen_removed_entity_workunits(self) -> Iterable[MetadataWorkUnit]: # If the source already had a failure, skip soft-deletion. # TODO: Eventually, switch this to check if anything in the pipeline had a failure so far. if self.source.get_report().failures: + for urn in last_checkpoint_state.get_urns_not_in( + type="*", other_checkpoint_state=cur_checkpoint_state + ): + self.add_entity_to_state("", urn) self.source.get_report().report_warning( "stale-entity-removal", - "Skipping stale entity soft-deletion since source already had failures.", + "Skipping stale entity soft-deletion and coping urns from last state since source already had failures.", ) return diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau.py index 23a75745698d9..1d44fb6122a36 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau.py @@ -15,12 +15,13 @@ Union, cast, ) +from urllib.parse import urlparse import dateutil.parser as dp import tableauserverclient as TSC from pydantic import root_validator, validator from pydantic.fields import Field -from requests.adapters import ConnectionError, HTTPAdapter +from requests.adapters import HTTPAdapter from tableauserverclient import ( PersonalAccessTokenAuth, Server, @@ -86,6 +87,7 @@ clean_query, custom_sql_graphql_query, dashboard_graphql_query, + database_servers_graphql_query, database_tables_graphql_query, embedded_datasource_graphql_query, get_filter_pages, @@ -345,6 +347,11 @@ class TableauConfig( description="Mappings to change generated dataset urns. Use only if you really know what you are doing.", ) + database_hostname_to_platform_instance_map: Optional[Dict[str, str]] = Field( + default=None, + description="Mappings to change platform instance in generated dataset urns based on database. Use only if you really know what you are doing.", + ) + extract_usage_stats: bool = Field( default=False, description="[experimental] Extract usage statistics for dashboards and charts.", @@ -537,6 +544,8 @@ def __init__( self.workbook_project_map: Dict[str, str] = {} self.datasource_project_map: Dict[str, str] = {} + # This map keeps track of the database server connection hostnames. + self.database_server_hostname_map: Dict[str, str] = {} # This list keeps track of sheets in workbooks so that we retrieve those # when emitting sheets. self.sheet_ids: List[str] = [] @@ -572,11 +581,11 @@ def close(self) -> None: try: if self.server is not None: self.server.auth.sign_out() - except ConnectionError as err: + except Exception as ex: logger.warning( "During graceful closing of Tableau source a sign-out call was tried but ended up with" - " a ConnectionError (%s). Continuing closing of the source", - err, + " an Exception (%s). Continuing closing of the source", + ex, ) self.server = None super().close() @@ -609,6 +618,24 @@ def _populate_usage_stat_registry(self) -> None: self.tableau_stat_registry[view.id] = UsageStat(view_count=view.total_views) logger.debug("Tableau stats %s", self.tableau_stat_registry) + def _populate_database_server_hostname_map(self) -> None: + def maybe_parse_hostname(): + # If the connection string is a URL instead of a hostname, parse it + # and extract the hostname, otherwise just return the connection string. + parsed_host_name = urlparse(server_connection).hostname + if parsed_host_name: + return parsed_host_name + return server_connection + + for database_server in self.get_connection_objects( + database_servers_graphql_query, c.DATABASE_SERVERS_CONNECTION + ): + database_server_id = database_server.get(c.ID) + server_connection = database_server.get(c.HOST_NAME) + host_name = maybe_parse_hostname() + if host_name: + self.database_server_hostname_map[str(database_server_id)] = host_name + def _get_all_project(self) -> Dict[str, TableauProject]: all_project_map: Dict[str, TableauProject] = {} @@ -864,7 +891,7 @@ def get_connection_objects( self, query: str, connection_type: str, - query_filter: dict, + query_filter: dict = {}, page_size_override: Optional[int] = None, ) -> Iterable[dict]: # Calls the get_connection_object_page function to get the objects, @@ -897,8 +924,7 @@ def get_connection_objects( offset += count - for obj in connection_objects.get(c.NODES) or []: - yield obj + yield from connection_objects.get(c.NODES) or [] def emit_workbooks(self) -> Iterable[MetadataWorkUnit]: if self.tableau_project_registry: @@ -1004,6 +1030,30 @@ def _create_upstream_table_lineage( env=self.config.env, ) + if not upstream_tables: + # Tableau's metadata graphql API sometimes returns an empty list for upstreamTables + # for embedded datasources. However, the upstreamColumns field often includes information. + # This attempts to populate upstream table information from the upstreamColumns field. + table_id_to_urn = { + column[c.TABLE][c.ID]: builder.make_dataset_urn_with_platform_instance( + self.platform, + column[c.TABLE][c.ID], + self.config.platform_instance, + self.config.env, + ) + for field in datasource.get(c.FIELDS, []) + for column in field.get(c.UPSTREAM_COLUMNS, []) + if column.get(c.TABLE, {}).get(c.TYPE_NAME) == c.CUSTOM_SQL_TABLE + and column.get(c.TABLE, {}).get(c.ID) + } + fine_grained_lineages = self.get_upstream_columns_of_fields_in_datasource( + datasource, datasource_urn, table_id_to_urn + ) + upstream_tables = [ + Upstream(dataset=table_urn, type=DatasetLineageType.TRANSFORMED) + for table_urn in table_id_to_urn.values() + ] + if datasource.get(c.FIELDS): if self.config.extract_column_level_lineage: # Find fine grained lineage for datasource column to datasource column edge, @@ -1118,6 +1168,8 @@ def get_upstream_tables( self.config.env, self.config.platform_instance_map, self.config.lineage_overrides, + self.config.database_hostname_to_platform_instance_map, + self.database_server_hostname_map, ) table_id_to_urn[table[c.ID]] = table_urn @@ -1684,8 +1736,11 @@ def parse_custom_sql( [ str, Optional[str], + Optional[str], Optional[Dict[str, str]], Optional[TableauLineageOverrides], + Optional[Dict[str, str]], + Optional[Dict[str, str]], ], Tuple[Optional[str], Optional[str], str, str], ] @@ -1693,7 +1748,7 @@ def parse_custom_sql( ) -> Optional["SqlParsingResult"]: database_info = datasource.get(c.DATABASE) or { c.NAME: c.UNKNOWN.lower(), - c.CONNECTION_TYPE: "databricks", + c.CONNECTION_TYPE: datasource.get(c.CONNECTION_TYPE), } if ( @@ -1703,7 +1758,10 @@ def parse_custom_sql( logger.debug(f"datasource {datasource_urn} is not created from custom sql") return None - if c.NAME not in database_info or c.CONNECTION_TYPE not in database_info: + if ( + database_info.get(c.NAME) is None + or database_info.get(c.CONNECTION_TYPE) is None + ): logger.debug( f"database information is missing from datasource {datasource_urn}" ) @@ -1726,8 +1784,11 @@ def parse_custom_sql( upstream_db, platform_instance, platform, _ = func_overridden_info( database_info[c.CONNECTION_TYPE], database_info.get(c.NAME), + database_info.get(c.ID), self.config.platform_instance_map, self.config.lineage_overrides, + self.config.database_hostname_to_platform_instance_map, + self.database_server_hostname_map, ) logger.debug( @@ -2732,6 +2793,11 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: if self.config.extract_usage_stats: self._populate_usage_stat_registry() + # Populate the map of database names and database hostnames to be used later to map + # databases to platform instances. + if self.config.database_hostname_to_platform_instance_map: + self._populate_database_server_hostname_map() + self._populate_projects_registry() yield from self.emit_project_containers() yield from self.emit_workbooks() diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py b/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py index 98536472c5f61..6c75876e68787 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau_common.py @@ -206,6 +206,7 @@ class MetadataQueryException(Exception): name database { name + id } schema fullName @@ -290,6 +291,7 @@ class MetadataQueryException(Exception): name database { name + id } schema fullName @@ -315,6 +317,7 @@ class MetadataQueryException(Exception): name database { name + id } schema fullName @@ -324,8 +327,10 @@ class MetadataQueryException(Exception): totalCount } } + connectionType database{ name + id connectionType } } @@ -346,6 +351,7 @@ class MetadataQueryException(Exception): name database { name + id } schema fullName @@ -417,6 +423,16 @@ class MetadataQueryException(Exception): } """ +database_servers_graphql_query = """ +{ + name + id + connectionType + extendedConnectionType + hostName +} +""" + # https://referencesource.microsoft.com/#system.data/System/Data/OleDb/OLEDB_Enum.cs,364 FIELD_TYPE_MAPPING = { "INTEGER": NumberTypeClass, @@ -591,6 +607,7 @@ def get_fully_qualified_table_name( @dataclass class TableauUpstreamReference: database: Optional[str] + database_id: Optional[str] schema: Optional[str] table: str @@ -602,6 +619,7 @@ def create( ) -> "TableauUpstreamReference": # Values directly from `table` object from Tableau database = t_database = d.get(c.DATABASE, {}).get(c.NAME) + database_id = d.get(c.DATABASE, {}).get(c.ID) schema = t_schema = d.get(c.SCHEMA) table = t_table = d.get(c.NAME) or "" t_full_name = d.get(c.FULL_NAME) @@ -653,6 +671,7 @@ def create( return cls( database=database, + database_id=database_id, schema=schema, table=table, connection_type=t_connection_type, @@ -678,6 +697,8 @@ def make_dataset_urn( env: str, platform_instance_map: Optional[Dict[str, str]], lineage_overrides: Optional[TableauLineageOverrides] = None, + database_hostname_to_platform_instance_map: Optional[Dict[str, str]] = None, + database_server_hostname_map: Optional[Dict[str, str]] = None, ) -> str: ( upstream_db, @@ -687,8 +708,11 @@ def make_dataset_urn( ) = get_overridden_info( connection_type=self.connection_type, upstream_db=self.database, + upstream_db_id=self.database_id, lineage_overrides=lineage_overrides, platform_instance_map=platform_instance_map, + database_hostname_to_platform_instance_map=database_hostname_to_platform_instance_map, + database_server_hostname_map=database_server_hostname_map, ) table_name = get_fully_qualified_table_name( @@ -706,8 +730,11 @@ def make_dataset_urn( def get_overridden_info( connection_type: Optional[str], upstream_db: Optional[str], + upstream_db_id: Optional[str], platform_instance_map: Optional[Dict[str, str]], lineage_overrides: Optional[TableauLineageOverrides] = None, + database_hostname_to_platform_instance_map: Optional[Dict[str, str]] = None, + database_server_hostname_map: Optional[Dict[str, str]] = None, ) -> Tuple[Optional[str], Optional[str], str, str]: original_platform = platform = get_platform(connection_type) if ( @@ -728,6 +755,17 @@ def get_overridden_info( platform_instance = ( platform_instance_map.get(original_platform) if platform_instance_map else None ) + if ( + database_server_hostname_map is not None + and upstream_db_id is not None + and upstream_db_id in database_server_hostname_map + ): + hostname = database_server_hostname_map.get(upstream_db_id) + if ( + database_hostname_to_platform_instance_map is not None + and hostname in database_hostname_to_platform_instance_map + ): + platform_instance = database_hostname_to_platform_instance_map.get(hostname) if original_platform in ("athena", "hive", "mysql"): # Two tier databases upstream_db = None @@ -827,6 +865,7 @@ def get_unique_custom_sql(custom_sql_list: List[dict]) -> List[dict]: # are missing from api result. "isUnsupportedCustomSql": True if not custom_sql.get("tables") else False, "query": custom_sql.get("query"), + "connectionType": custom_sql.get("connectionType"), "columns": custom_sql.get("columns"), "tables": custom_sql.get("tables"), "database": custom_sql.get("database"), diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau_constant.py b/metadata-ingestion/src/datahub/ingestion/source/tableau_constant.py index e80c9d8fd1f25..9ead9a407d957 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau_constant.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau_constant.py @@ -23,6 +23,8 @@ CUSTOM_SQL_TABLE = "CustomSQLTable" UPSTREAM_TABLES = "upstreamTables" DATABASE_TABLES_CONNECTION = "databaseTablesConnection" +DATABASE_SERVERS_CONNECTION = "databaseServersConnection" +HOST_NAME = "hostName" FIELDS = "fields" UPSTREAM_DATA_SOURCES = "upstreamDatasources" COLUMNS = "columns" diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/hive_metastore_proxy.py b/metadata-ingestion/src/datahub/ingestion/source/unity/hive_metastore_proxy.py index 140698a6c4b10..c99fe3b09c5bb 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/hive_metastore_proxy.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/hive_metastore_proxy.py @@ -332,7 +332,7 @@ def _get_table_info(self, schema_name: str, table_name: str) -> dict: properties[col_name] = data_type.strip() else: # col_name == "", data_type is not None - prop_name = "{} {}".format(active_heading, data_type.rstrip()) + prop_name = f"{active_heading} {data_type.rstrip()}" properties[prop_name] = value.rstrip() except Exception as e: self.report.report_warning( diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py index f3aeb34002f3f..f1f0b5ddb4475 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py @@ -163,7 +163,7 @@ def get_report(self) -> UnityCatalogReport: return self.report def __init__(self, ctx: PipelineContext, config: UnityCatalogSourceConfig): - super(UnityCatalogSource, self).__init__(config, ctx) + super().__init__(config, ctx) self.config = config self.report: UnityCatalogReport = UnityCatalogReport() diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/dataset_domain_based_on_tags.py b/metadata-ingestion/src/datahub/ingestion/transformer/dataset_domain_based_on_tags.py new file mode 100644 index 0000000000000..7be8069e1b085 --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/transformer/dataset_domain_based_on_tags.py @@ -0,0 +1,70 @@ +from typing import Dict, List, Optional, Set, cast + +from datahub.configuration.common import ( + TransformerSemantics, + TransformerSemanticsConfigModel, +) +from datahub.emitter.mce_builder import Aspect +from datahub.ingestion.api.common import PipelineContext +from datahub.ingestion.transformer.dataset_domain import AddDatasetDomain +from datahub.ingestion.transformer.dataset_transformer import DatasetDomainTransformer +from datahub.metadata.schema_classes import DomainsClass, GlobalTagsClass + + +class DatasetTagDomainMapperConfig(TransformerSemanticsConfigModel): + domain_mapping: Dict[str, str] + + +class DatasetTagDomainMapper(DatasetDomainTransformer): + """A transformer that appends a predefined set of domains to each dataset that includes specific tags defined in the transformer.""" + + def __init__(self, config: DatasetTagDomainMapperConfig, ctx: PipelineContext): + super().__init__() + self.ctx: PipelineContext = ctx + self.config: DatasetTagDomainMapperConfig = config + + @classmethod + def create( + cls, config_dict: dict, ctx: PipelineContext + ) -> "DatasetTagDomainMapper": + config = DatasetTagDomainMapperConfig.parse_obj(config_dict) + return cls(config, ctx) + + def transform_aspect( + self, entity_urn: str, aspect_name: str, aspect: Optional[Aspect] + ) -> Optional[Aspect]: + # Initialize the existing domain aspect + existing_domain_aspect: DomainsClass = cast(DomainsClass, aspect) + assert self.ctx.graph + global_tags: Optional[GlobalTagsClass] = self.ctx.graph.get_tags(entity_urn) + # Check if we have tags received in existing aspect + if global_tags: + domain_mapping = self.config.domain_mapping + transformer_tags = domain_mapping.keys() + tags_seen: Set[str] = set() + for tag_item in global_tags.tags: + tag = tag_item.tag.split("urn:li:tag:")[-1] + if tag in transformer_tags: + tags_seen.add(tag) + + if tags_seen: + domain_aspect = DomainsClass(domains=[]) + domains_to_add: List[str] = [] + for tag in tags_seen: + if domain_mapping.get(tag): + domains_to_add.append(domain_mapping[tag]) + + mapped_domains = AddDatasetDomain.get_domain_class( + self.ctx.graph, domains_to_add + ) + domain_aspect.domains.extend(mapped_domains.domains) + if self.config.semantics == TransformerSemantics.PATCH: + # Try merging with server-side domains + patch_domain_aspect: Optional[ + DomainsClass + ] = AddDatasetDomain._merge_with_server_domains( + self.ctx.graph, entity_urn, domain_aspect + ) + return cast(Optional[Aspect], patch_domain_aspect) + return cast(Optional[Aspect], domain_aspect) + return cast(Optional[Aspect], existing_domain_aspect) diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/dataset_transformer.py b/metadata-ingestion/src/datahub/ingestion/transformer/dataset_transformer.py index 79151f7b11bf0..a78a79141e8e4 100644 --- a/metadata-ingestion/src/datahub/ingestion/transformer/dataset_transformer.py +++ b/metadata-ingestion/src/datahub/ingestion/transformer/dataset_transformer.py @@ -123,3 +123,8 @@ def aspect_name(self) -> str: class DatasetDataproductTransformer(DatasetTransformer, metaclass=ABCMeta): def aspect_name(self) -> str: return "dataProductProperties" + + +class DatasetUsageStatisticsTransformer(DatasetTransformer, metaclass=ABCMeta): + def aspect_name(self) -> str: + return "datasetUsageStatistics" diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/pattern_cleanup_dataset_usage_user.py b/metadata-ingestion/src/datahub/ingestion/transformer/pattern_cleanup_dataset_usage_user.py new file mode 100644 index 0000000000000..a3d41c8e91ec5 --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/transformer/pattern_cleanup_dataset_usage_user.py @@ -0,0 +1,67 @@ +import copy +import re +from typing import Any, Dict, List, Optional, cast + +from datahub.configuration.common import ConfigModel +from datahub.emitter.mce_builder import Aspect +from datahub.ingestion.api.common import PipelineContext +from datahub.ingestion.transformer.dataset_transformer import ( + DatasetUsageStatisticsTransformer, +) +from datahub.metadata.schema_classes import DatasetUsageStatisticsClass + +_USER_URN_PREFIX: str = "urn:li:corpuser:" + + +class PatternCleanupDatasetUsageUserConfig(ConfigModel): + pattern_for_cleanup: List[str] + + +class PatternCleanupDatasetUsageUser(DatasetUsageStatisticsTransformer): + """Transformer that clean the user URN for DatasetUsageStatistics aspect.""" + + ctx: PipelineContext + config: PatternCleanupDatasetUsageUserConfig + + def __init__( + self, + config: PatternCleanupDatasetUsageUserConfig, + ctx: PipelineContext, + **resolver_args: Dict[str, Any], + ): + super().__init__() + self.config = config + self.ctx = ctx + self.resolver_args = resolver_args + + @classmethod + def create( + cls, config_dict: dict, ctx: PipelineContext + ) -> "PatternCleanupDatasetUsageUser": + config = PatternCleanupDatasetUsageUserConfig.parse_obj(config_dict) + return cls(config, ctx) + + def transform_aspect( + self, entity_urn: str, aspect_name: str, aspect: Optional[Aspect] + ) -> Optional[Aspect]: + in_dataset_properties_aspect: DatasetUsageStatisticsClass = cast( + DatasetUsageStatisticsClass, aspect + ) + + if in_dataset_properties_aspect.userCounts is not None: + out_dataset_properties_aspect: DatasetUsageStatisticsClass = copy.deepcopy( + in_dataset_properties_aspect + ) + + if out_dataset_properties_aspect.userCounts is not None: + for user in out_dataset_properties_aspect.userCounts: + user_id: str = user.user.split(_USER_URN_PREFIX)[1] + for value in self.config.pattern_for_cleanup: + cleaned_user_id = re.sub(value, "", user_id) + user.user = _USER_URN_PREFIX + cleaned_user_id + + return cast(Aspect, out_dataset_properties_aspect) + else: + return cast(Aspect, out_dataset_properties_aspect) + else: + return cast(Aspect, in_dataset_properties_aspect) diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/pattern_cleanup_ownership.py b/metadata-ingestion/src/datahub/ingestion/transformer/pattern_cleanup_ownership.py index 1e949affd1766..8ef61ab9679e6 100644 --- a/metadata-ingestion/src/datahub/ingestion/transformer/pattern_cleanup_ownership.py +++ b/metadata-ingestion/src/datahub/ingestion/transformer/pattern_cleanup_ownership.py @@ -42,9 +42,9 @@ def _get_current_owner_urns(self, entity_urn: str) -> Set[str]: if self.ctx.graph is not None: current_ownership = self.ctx.graph.get_ownership(entity_urn=entity_urn) if current_ownership is not None: - current_owner_urns: Set[str] = set( - [owner.owner for owner in current_ownership.owners] - ) + current_owner_urns: Set[str] = { + owner.owner for owner in current_ownership.owners + } return current_owner_urns else: return set() diff --git a/metadata-ingestion/src/datahub/integrations/great_expectations/action.py b/metadata-ingestion/src/datahub/integrations/great_expectations/action.py index f76d145a87043..94501b0d499b7 100644 --- a/metadata-ingestion/src/datahub/integrations/great_expectations/action.py +++ b/metadata-ingestion/src/datahub/integrations/great_expectations/action.py @@ -769,9 +769,7 @@ def make_dataset_urn_from_sqlalchemy_uri( ) return None schema_name = ( - schema_name - if exclude_dbname - else "{}.{}".format(url_instance.database, schema_name) + schema_name if exclude_dbname else f"{url_instance.database}.{schema_name}" ) elif data_platform == "mssql": schema_name = schema_name or "dbo" @@ -781,9 +779,7 @@ def make_dataset_urn_from_sqlalchemy_uri( ) return None schema_name = ( - schema_name - if exclude_dbname - else "{}.{}".format(url_instance.database, schema_name) + schema_name if exclude_dbname else f"{url_instance.database}.{schema_name}" ) elif data_platform in ["trino", "snowflake"]: if schema_name is None or url_instance.database is None: @@ -804,9 +800,7 @@ def make_dataset_urn_from_sqlalchemy_uri( if database_name.endswith(f"/{schema_name}"): database_name = database_name[: -len(f"/{schema_name}")] schema_name = ( - schema_name - if exclude_dbname - else "{}.{}".format(database_name, schema_name) + schema_name if exclude_dbname else f"{database_name}.{schema_name}" ) elif data_platform == "bigquery": @@ -817,7 +811,7 @@ def make_dataset_urn_from_sqlalchemy_uri( ) ) return None - schema_name = "{}.{}".format(url_instance.host, url_instance.database) + schema_name = f"{url_instance.host}.{url_instance.database}" schema_name = schema_name or url_instance.database if schema_name is None: @@ -853,7 +847,7 @@ class DecimalEncoder(json.JSONEncoder): def default(self, o): if isinstance(o, Decimal): return str(o) - return super(DecimalEncoder, self).default(o) + return super().default(o) def convert_to_string(var: Any) -> str: diff --git a/metadata-ingestion/src/datahub/sql_parsing/schema_resolver.py b/metadata-ingestion/src/datahub/sql_parsing/schema_resolver.py index 5e2e510533af1..ae5d83c2dfc94 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/schema_resolver.py +++ b/metadata-ingestion/src/datahub/sql_parsing/schema_resolver.py @@ -81,7 +81,7 @@ def includes_temp_tables(self) -> bool: return False def get_urns(self) -> Set[str]: - return set(k for k, v in self._schema_cache.items() if v is not None) + return {k for k, v in self._schema_cache.items() if v is not None} def schema_count(self) -> int: return int( diff --git a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py index f06ca650bab9e..530764e8320cd 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py +++ b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py @@ -51,6 +51,7 @@ ) from datahub.utilities.lossy_collections import LossyDict, LossyList from datahub.utilities.ordered_set import OrderedSet +from datahub.utilities.perf_timer import PerfTimer logger = logging.getLogger(__name__) QueryId = str @@ -156,6 +157,10 @@ class SqlAggregatorReport(Report): default_factory=LossyDict ) + # SQL parsing (over all invocations). + num_sql_parsed: int = 0 + sql_parsing_timer: PerfTimer = dataclasses.field(default_factory=PerfTimer) + # Other lineage loading metrics. num_known_query_lineage: int = 0 num_known_mapping_lineage: int = 0 @@ -749,12 +754,14 @@ def _run_sql_parser( timestamp: Optional[datetime] = None, user: Optional[CorpUserUrn] = None, ) -> SqlParsingResult: - parsed = sqlglot_lineage( - query, - schema_resolver=schema_resolver, - default_db=default_db, - default_schema=default_schema, - ) + with self.report.sql_parsing_timer: + parsed = sqlglot_lineage( + query, + schema_resolver=schema_resolver, + default_db=default_db, + default_schema=default_schema, + ) + self.report.num_sql_parsed += 1 # Conditionally log the query. if self.query_log == QueryLogSetting.STORE_ALL or ( diff --git a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py index de648ec29b233..c112f5b74ac51 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py +++ b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py @@ -406,10 +406,11 @@ def _schema_aware_fuzzy_column_resolve( return default_col_name # Optimize the statement + qualify column references. - logger.debug( - "Prior to column qualification sql %s", - statement.sql(pretty=True, dialect=dialect), - ) + if logger.isEnabledFor(logging.DEBUG): + logger.debug( + "Prior to column qualification sql %s", + statement.sql(pretty=True, dialect=dialect), + ) try: # Second time running qualify, this time with: # - the select instead of the full outer statement @@ -434,7 +435,8 @@ def _schema_aware_fuzzy_column_resolve( raise SqlUnderstandingError( f"sqlglot failed to map columns to their source tables; likely missing/outdated table schema info: {e}" ) from e - logger.debug("Qualified sql %s", statement.sql(pretty=True, dialect=dialect)) + if logger.isEnabledFor(logging.DEBUG): + logger.debug("Qualified sql %s", statement.sql(pretty=True, dialect=dialect)) # Handle the create DDL case. if is_create_ddl: @@ -493,6 +495,9 @@ def _schema_aware_fuzzy_column_resolve( # Otherwise, we can't process it. continue + if output_col == "": + continue + if is_dialect_instance(dialect, "bigquery") and output_col.lower() in { "_partitiontime", "_partitiondate", @@ -805,7 +810,7 @@ def _sqlglot_lineage_inner( logger.debug("Parsing lineage from sql statement: %s", sql) statement = parse_statement(sql, dialect=dialect) - original_statement = statement.copy() + original_statement, statement = statement, statement.copy() # logger.debug( # "Formatted sql statement: %s", # original_statement.sql(pretty=True, dialect=dialect), @@ -886,9 +891,9 @@ def _sqlglot_lineage_inner( try: if select_statement is not None: with cooperative_timeout( - timeout=SQL_LINEAGE_TIMEOUT_SECONDS - if SQL_LINEAGE_TIMEOUT_ENABLED - else None + timeout=( + SQL_LINEAGE_TIMEOUT_SECONDS if SQL_LINEAGE_TIMEOUT_ENABLED else None + ) ): column_lineage = _column_level_lineage( select_statement, @@ -914,8 +919,8 @@ def _sqlglot_lineage_inner( # TODO: Can we generate a common WHERE clauses section? # Convert TableName to urns. - in_urns = sorted(set(table_name_urn_mapping[table] for table in tables)) - out_urns = sorted(set(table_name_urn_mapping[table] for table in modified)) + in_urns = sorted({table_name_urn_mapping[table] for table in tables}) + out_urns = sorted({table_name_urn_mapping[table] for table in modified}) column_lineage_urns = None if column_lineage: column_lineage_urns = [ diff --git a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py index c7cf975a3a953..dfb3b8925dcca 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py +++ b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py @@ -1,12 +1,17 @@ +import functools import hashlib import logging from typing import Dict, Iterable, Optional, Tuple, Union import sqlglot import sqlglot.errors +import sqlglot.optimizer.eliminate_ctes logger = logging.getLogger(__name__) DialectOrStr = Union[sqlglot.Dialect, str] +SQL_PARSE_CACHE_SIZE = 1000 + +FORMAT_QUERY_CACHE_SIZE = 1000 def _get_dialect_str(platform: str) -> str: @@ -55,7 +60,8 @@ def is_dialect_instance( return False -def parse_statement( +@functools.lru_cache(maxsize=SQL_PARSE_CACHE_SIZE) +def _parse_statement( sql: sqlglot.exp.ExpOrStr, dialect: sqlglot.Dialect ) -> sqlglot.Expression: statement: sqlglot.Expression = sqlglot.maybe_parse( @@ -64,6 +70,16 @@ def parse_statement( return statement +def parse_statement( + sql: sqlglot.exp.ExpOrStr, dialect: sqlglot.Dialect +) -> sqlglot.Expression: + # Parsing is significantly more expensive than copying the expression. + # Because the expressions are mutable, we don't want to allow the caller + # to modify the parsed expression that sits in the cache. We keep + # the cached versions pristine by returning a copy on each call. + return _parse_statement(sql, dialect).copy() + + def parse_statements_and_pick(sql: str, platform: DialectOrStr) -> sqlglot.Expression: dialect = get_dialect(platform) statements = [ @@ -200,6 +216,7 @@ def get_query_fingerprint( return get_query_fingerprint_debug(expression, platform)[0] +@functools.lru_cache(maxsize=FORMAT_QUERY_CACHE_SIZE) def try_format_query( expression: sqlglot.exp.ExpOrStr, platform: DialectOrStr, raises: bool = False ) -> str: @@ -277,4 +294,23 @@ def replace_cte_refs(node: sqlglot.exp.Expression) -> sqlglot.exp.Expression: else: return node - return statement.transform(replace_cte_refs, copy=False) + statement = statement.copy() + statement = statement.transform(replace_cte_refs, copy=False) + + # There's a bug in eliminate_ctes that causes it to not remove all unused CTEs + # when there's a complex chain of dependent CTEs. As a workaround, we call the + # method multiple times until it no longer eliminates any CTEs. + max_eliminate_calls = 5 + for iteration in range(max_eliminate_calls): + new_statement = sqlglot.optimizer.eliminate_ctes.eliminate_ctes( + statement.copy() + ) + if new_statement == statement: + if iteration > 1: + logger.debug( + f"Required {iteration+1} iterations to detach and eliminate all CTEs" + ) + break + statement = new_statement + + return statement diff --git a/metadata-ingestion/src/datahub/telemetry/telemetry.py b/metadata-ingestion/src/datahub/telemetry/telemetry.py index 08df9e80ecf29..69a790b3d9bc7 100644 --- a/metadata-ingestion/src/datahub/telemetry/telemetry.py +++ b/metadata-ingestion/src/datahub/telemetry/telemetry.py @@ -174,7 +174,7 @@ def update_config(self) -> bool: indent=2, ) return True - except IOError as x: + except OSError as x: if x.errno == errno.ENOENT: logger.debug( f"{CONFIG_FILE} does not exist and could not be created. Please check permissions on the parent folder." @@ -215,12 +215,12 @@ def load_config(self) -> bool: """ try: - with open(CONFIG_FILE, "r") as f: + with open(CONFIG_FILE) as f: config = json.load(f) self.client_id = config["client_id"] self.enabled = config["enabled"] & ENV_ENABLED return True - except IOError as x: + except OSError as x: if x.errno == errno.ENOENT: logger.debug( f"{CONFIG_FILE} does not exist and could not be created. Please check permissions on the parent folder." diff --git a/metadata-ingestion/src/datahub/testing/compare_metadata_json.py b/metadata-ingestion/src/datahub/testing/compare_metadata_json.py index 91f5d6f914676..90d80dbeec8b2 100644 --- a/metadata-ingestion/src/datahub/testing/compare_metadata_json.py +++ b/metadata-ingestion/src/datahub/testing/compare_metadata_json.py @@ -5,6 +5,7 @@ import os import pathlib import pprint +import re import shutil import tempfile from typing import Any, Dict, List, Sequence, Union @@ -40,6 +41,7 @@ def assert_metadata_files_equal( update_golden: bool, copy_output: bool, ignore_paths: Sequence[str] = (), + ignore_paths_v2: Sequence[str] = (), ignore_order: bool = True, ) -> None: golden_exists = os.path.isfile(golden_path) @@ -70,6 +72,16 @@ def assert_metadata_files_equal( logger.info(f"Error reformatting golden file as MCP/MCEs: {e}") golden = load_json_file(golden_path) + if ignore_paths_v2: + golden_json = load_json_file(golden_path) + for i, obj in enumerate(golden_json): + aspect_json = obj.get("aspect", {}).get("json", []) + for j, item in enumerate(aspect_json): + if isinstance(item, dict): + if item.get("path") in ignore_paths_v2: + json_path = f"root[{i}]['aspect']['json'][{j}]['value']" + ignore_paths = (*ignore_paths, re.escape(json_path)) + diff = diff_metadata_json(output, golden, ignore_paths, ignore_order=ignore_order) if diff and update_golden: if isinstance(diff, MCPDiff): diff --git a/metadata-ingestion/src/datahub/utilities/delta.py b/metadata-ingestion/src/datahub/utilities/delta.py new file mode 100644 index 0000000000000..281eec4310c89 --- /dev/null +++ b/metadata-ingestion/src/datahub/utilities/delta.py @@ -0,0 +1,34 @@ +from typing import Any + + +def delta_type_to_hive_type(field_type: Any) -> str: + if isinstance(field_type, str): + """ + return the field type + """ + return field_type + else: + if field_type.get("type") == "array": + """ + if array is of complex type, recursively parse the + fields and create the native datatype + """ + return ( + "array<" + delta_type_to_hive_type(field_type.get("elementType")) + ">" + ) + elif field_type.get("type") == "struct": + parsed_struct = "" + for field in field_type.get("fields"): + """ + if field is of complex type, recursively parse + and create the native datatype + """ + parsed_struct += ( + "{}:{}".format( + field.get("name"), + delta_type_to_hive_type(field.get("type")), + ) + + "," + ) + return "struct<" + parsed_struct.rstrip(",") + ">" + return "" diff --git a/metadata-ingestion/src/datahub/utilities/file_backed_collections.py b/metadata-ingestion/src/datahub/utilities/file_backed_collections.py index d264a3970fdde..bb2b827dc06c3 100644 --- a/metadata-ingestion/src/datahub/utilities/file_backed_collections.py +++ b/metadata-ingestion/src/datahub/utilities/file_backed_collections.py @@ -15,6 +15,7 @@ Any, Callable, Dict, + Final, Generic, Iterator, List, @@ -28,8 +29,6 @@ Union, ) -from typing_extensions import Final - from datahub.ingestion.api.closeable import Closeable logger: logging.Logger = logging.getLogger(__name__) diff --git a/metadata-ingestion/src/datahub/utilities/hive_schema_to_avro.py b/metadata-ingestion/src/datahub/utilities/hive_schema_to_avro.py index 4fcef990ae4f4..e98fe42c1d56c 100644 --- a/metadata-ingestion/src/datahub/utilities/hive_schema_to_avro.py +++ b/metadata-ingestion/src/datahub/utilities/hive_schema_to_avro.py @@ -28,6 +28,21 @@ class HiveColumnToAvroConverter: "bigint": "long", "varchar": "string", "char": "string", + "long": "long", + "bytes": "bytes", + } + _EXTRA_BIGQUERY_TYPE_TO_AVRO_TYPE = { + # A few extra types, purely to map BigQuery things correctly. + "bool": "boolean", + "decimal": "double", + "numeric": "int", + "bignumeric": "long", + "bigdecimal": "double", + "float64": "double", + "int64": "long", + "smallint": "int", + "tinyint": "int", + "byteint": "int", } _COMPLEX_TYPE = re.compile("^(struct|map|array|uniontype)") @@ -57,10 +72,8 @@ def _parse_datatype_string( parts = HiveColumnToAvroConverter._ignore_brackets_split(s[4:-1], ",") if len(parts) != 2: raise ValueError( - ( - "The map type string format is: 'map', " - + f"but got: {s}" - ) + "The map type string format is: 'map', " + + f"but got: {s}" ) kt = HiveColumnToAvroConverter._parse_datatype_string(parts[0]) @@ -112,10 +125,8 @@ def _parse_struct_fields_string(s: str, **kwargs: Any) -> Dict[str, object]: ) if len(name_and_type) != 2: raise ValueError( - ( - "The struct field string format is: 'field_name:field_type', " - + f"but got: {part}" - ) + "The struct field string format is: 'field_name:field_type', " + + f"but got: {part}" ) field_name = name_and_type[0].strip() @@ -180,13 +191,19 @@ def _parse_basic_datatype_string(s: str) -> Dict[str, object]: "native_data_type": s, "_nullable": True, } - elif s == "timestamp": + elif s in {"timestamp", "datetime"}: return { "type": "int", "logicalType": "timestamp-millis", "native_data_type": s, "_nullable": True, } + elif s in HiveColumnToAvroConverter._EXTRA_BIGQUERY_TYPE_TO_AVRO_TYPE: + return { + "type": HiveColumnToAvroConverter._EXTRA_BIGQUERY_TYPE_TO_AVRO_TYPE[s], + "native_data_type": s, + "_nullable": True, + } else: return {"type": "null", "native_data_type": s, "_nullable": True} diff --git a/metadata-ingestion/src/datahub/utilities/lossy_collections.py b/metadata-ingestion/src/datahub/utilities/lossy_collections.py index bf129adda5e7d..6f5f4bda369a0 100644 --- a/metadata-ingestion/src/datahub/utilities/lossy_collections.py +++ b/metadata-ingestion/src/datahub/utilities/lossy_collections.py @@ -1,5 +1,5 @@ import random -from typing import Dict, Generic, Iterator, List, Set, TypeVar, Union +from typing import Dict, Generic, Iterable, Iterator, List, Set, TypeVar, Union from datahub.configuration.pydantic_migration_helpers import PYDANTIC_VERSION_2 @@ -31,6 +31,10 @@ def append(self, __object: T) -> None: finally: self.total_elements += 1 + def extend(self, __iterable: Iterable[T]) -> None: + for item in __iterable: + self.append(item) + def __len__(self) -> int: return self.total_elements diff --git a/metadata-ingestion/src/datahub/utilities/urn_encoder.py b/metadata-ingestion/src/datahub/utilities/urn_encoder.py index b39dd04370682..88c0a128b8e46 100644 --- a/metadata-ingestion/src/datahub/utilities/urn_encoder.py +++ b/metadata-ingestion/src/datahub/utilities/urn_encoder.py @@ -15,7 +15,10 @@ def encode_string_array(arr: List[str]) -> List[str]: @staticmethod def encode_string(s: str) -> str: - return "".join([UrnEncoder.encode_char(c) for c in s]) + if not UrnEncoder.contains_reserved_char(s): + # Fast path for the common case, where no encoding is needed. + return s + return "".join(UrnEncoder.encode_char(c) for c in s) @staticmethod def encode_char(c: str) -> str: diff --git a/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py b/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py index 3389a6fb05ee8..5bef17119675e 100644 --- a/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py +++ b/metadata-ingestion/src/datahub/utilities/urns/urn_iter.py @@ -118,14 +118,15 @@ def _modify_at_path( assert isinstance(model, list) model[path[0]] = new_value elif isinstance(model, DictWrapper): - model._inner_dict[path[0]] = new_value + setattr(model, path[0], new_value) else: # MCPW setattr(model, path[0], new_value) elif isinstance(path[0], int): assert isinstance(model, list) _modify_at_path(model[path[0]], path[1:], new_value) elif isinstance(model, DictWrapper): - _modify_at_path(model._inner_dict[path[0]], path[1:], new_value) + item = getattr(model, path[0]) + _modify_at_path(item, path[1:], new_value) else: # MCPW _modify_at_path(getattr(model, path[0]), path[1:], new_value) diff --git a/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py b/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py index e79bbbe995aae..26511d9e5df1a 100644 --- a/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py +++ b/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py @@ -55,7 +55,7 @@ def test_bigquery_v2_ingest( tmp_path, ): test_resources_dir = pytestconfig.rootpath / "tests/integration/bigquery_v2" - mcp_golden_path = "{}/bigquery_mcp_golden.json".format(test_resources_dir) + mcp_golden_path = f"{test_resources_dir}/bigquery_mcp_golden.json" mcp_output_path = "{}/{}".format(tmp_path, "bigquery_mcp_output.json") get_datasets_for_project_id.return_value = [ diff --git a/metadata-ingestion/tests/integration/dbt/test_dbt.py b/metadata-ingestion/tests/integration/dbt/test_dbt.py index 5f7d65f5b2377..941315fcfa9d5 100644 --- a/metadata-ingestion/tests/integration/dbt/test_dbt.py +++ b/metadata-ingestion/tests/integration/dbt/test_dbt.py @@ -232,13 +232,13 @@ def test_dbt_ingest( config: DbtTestConfig = dbt_test_config test_resources_dir = pytestconfig.rootpath / "tests/integration/dbt" - with open(test_resources_dir / "dbt_manifest.json", "r") as f: + with open(test_resources_dir / "dbt_manifest.json") as f: requests_mock.get("http://some-external-repo/dbt_manifest.json", text=f.read()) - with open(test_resources_dir / "dbt_catalog.json", "r") as f: + with open(test_resources_dir / "dbt_catalog.json") as f: requests_mock.get("http://some-external-repo/dbt_catalog.json", text=f.read()) - with open(test_resources_dir / "dbt_sources.json", "r") as f: + with open(test_resources_dir / "dbt_sources.json") as f: requests_mock.get("http://some-external-repo/dbt_sources.json", text=f.read()) config.set_paths( diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_tables_with_nested_datatypes.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_tables_with_nested_datatypes.json index c8bf54efa46c2..fbf4578ef6589 100644 --- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_tables_with_nested_datatypes.json +++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_tables_with_nested_datatypes.json @@ -96,11 +96,11 @@ "jsonProps": "{\"native_data_type\": \"struct<_1:long,_2:string>\"}" }, { - "fieldPath": "[version=2.0].[type=struct].[type=struct].data.[type=struct]._2.[type=null]._1", + "fieldPath": "[version=2.0].[type=struct].[type=struct].data.[type=struct]._2.[type=long]._1", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.NullType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, "nativeDataType": "long", @@ -896,11 +896,11 @@ "jsonProps": "{\"native_data_type\": \"struct<_1:long,_2:string>\"}" }, { - "fieldPath": "[version=2.0].[type=struct].[type=struct].data.[type=struct]._2.[type=null]._1", + "fieldPath": "[version=2.0].[type=struct].[type=struct].data.[type=struct]._2.[type=long]._1", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.NullType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, "nativeDataType": "long", @@ -1491,11 +1491,11 @@ "jsonProps": "{\"native_data_type\": \"struct<_1:long,_2:string>\"}" }, { - "fieldPath": "[version=2.0].[type=struct].[type=struct].data.[type=struct]._1.[type=null]._1", + "fieldPath": "[version=2.0].[type=struct].[type=struct].data.[type=struct]._1.[type=long]._1", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.NullType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, "nativeDataType": "long", @@ -1530,11 +1530,11 @@ "jsonProps": "{\"native_data_type\": \"struct<_1:long,_2:string>\"}" }, { - "fieldPath": "[version=2.0].[type=struct].[type=struct].data.[type=struct]._2.[type=null]._1", + "fieldPath": "[version=2.0].[type=struct].[type=struct].data.[type=struct]._2.[type=long]._1", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.NullType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, "nativeDataType": "long", @@ -1717,7 +1717,7 @@ "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" }, { - "fieldPath": "[version=2.0].[type=struct].[type=array].[type=array].[type=array].[type=null].data", + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=array].[type=array].[type=long].data", "nullable": false, "type": { "type": { @@ -1796,397 +1796,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables_nested_datatype/table_with_string_and_nested_array_of_numbers,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)" - }, - { - "id": "urn:li:container:189046201d696e7810132cfa64dad337", - "urn": "urn:li:container:189046201d696e7810132cfa64dad337" - }, - { - "id": "urn:li:container:acf0f3806f475a7397ee745329ef2967", - "urn": "urn:li:container:acf0f3806f475a7397ee745329ef2967" - }, - { - "id": "urn:li:container:1876d057d0ee364677b85427342e2c82", - "urn": "urn:li:container:1876d057d0ee364677b85427342e2c82" - }, - { - "id": "urn:li:container:7888b6dab77b7e77709699c9a1b81aa4", - "urn": "urn:li:container:7888b6dab77b7e77709699c9a1b81aa4" - }, - { - "id": "urn:li:container:401e53437a2ce6094ab3021cb32919d9", - "urn": "urn:li:container:401e53437a2ce6094ab3021cb32919d9" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "tables_with_nested_datatypes.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:189046201d696e7810132cfa64dad337", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "tables_with_nested_datatypes.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:acf0f3806f475a7397ee745329ef2967", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)" - }, - { - "id": "urn:li:container:189046201d696e7810132cfa64dad337", - "urn": "urn:li:container:189046201d696e7810132cfa64dad337" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "tables_with_nested_datatypes.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:1876d057d0ee364677b85427342e2c82", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)" - }, - { - "id": "urn:li:container:189046201d696e7810132cfa64dad337", - "urn": "urn:li:container:189046201d696e7810132cfa64dad337" - }, - { - "id": "urn:li:container:acf0f3806f475a7397ee745329ef2967", - "urn": "urn:li:container:acf0f3806f475a7397ee745329ef2967" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "tables_with_nested_datatypes.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:7888b6dab77b7e77709699c9a1b81aa4", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)" - }, - { - "id": "urn:li:container:189046201d696e7810132cfa64dad337", - "urn": "urn:li:container:189046201d696e7810132cfa64dad337" - }, - { - "id": "urn:li:container:acf0f3806f475a7397ee745329ef2967", - "urn": "urn:li:container:acf0f3806f475a7397ee745329ef2967" - }, - { - "id": "urn:li:container:1876d057d0ee364677b85427342e2c82", - "urn": "urn:li:container:1876d057d0ee364677b85427342e2c82" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "tables_with_nested_datatypes.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:401e53437a2ce6094ab3021cb32919d9", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)" - }, - { - "id": "urn:li:container:189046201d696e7810132cfa64dad337", - "urn": "urn:li:container:189046201d696e7810132cfa64dad337" - }, - { - "id": "urn:li:container:acf0f3806f475a7397ee745329ef2967", - "urn": "urn:li:container:acf0f3806f475a7397ee745329ef2967" - }, - { - "id": "urn:li:container:1876d057d0ee364677b85427342e2c82", - "urn": "urn:li:container:1876d057d0ee364677b85427342e2c82" - }, - { - "id": "urn:li:container:7888b6dab77b7e77709699c9a1b81aa4", - "urn": "urn:li:container:7888b6dab77b7e77709699c9a1b81aa4" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "tables_with_nested_datatypes.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables_nested_datatype/table_with_nested_struct_1,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)" - }, - { - "id": "urn:li:container:189046201d696e7810132cfa64dad337", - "urn": "urn:li:container:189046201d696e7810132cfa64dad337" - }, - { - "id": "urn:li:container:acf0f3806f475a7397ee745329ef2967", - "urn": "urn:li:container:acf0f3806f475a7397ee745329ef2967" - }, - { - "id": "urn:li:container:1876d057d0ee364677b85427342e2c82", - "urn": "urn:li:container:1876d057d0ee364677b85427342e2c82" - }, - { - "id": "urn:li:container:7888b6dab77b7e77709699c9a1b81aa4", - "urn": "urn:li:container:7888b6dab77b7e77709699c9a1b81aa4" - }, - { - "id": "urn:li:container:401e53437a2ce6094ab3021cb32919d9", - "urn": "urn:li:container:401e53437a2ce6094ab3021cb32919d9" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "tables_with_nested_datatypes.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables_nested_datatype/table_with_nested_struct,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)" - }, - { - "id": "urn:li:container:189046201d696e7810132cfa64dad337", - "urn": "urn:li:container:189046201d696e7810132cfa64dad337" - }, - { - "id": "urn:li:container:acf0f3806f475a7397ee745329ef2967", - "urn": "urn:li:container:acf0f3806f475a7397ee745329ef2967" - }, - { - "id": "urn:li:container:1876d057d0ee364677b85427342e2c82", - "urn": "urn:li:container:1876d057d0ee364677b85427342e2c82" - }, - { - "id": "urn:li:container:7888b6dab77b7e77709699c9a1b81aa4", - "urn": "urn:li:container:7888b6dab77b7e77709699c9a1b81aa4" - }, - { - "id": "urn:li:container:401e53437a2ce6094ab3021cb32919d9", - "urn": "urn:li:container:401e53437a2ce6094ab3021cb32919d9" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "tables_with_nested_datatypes.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables_nested_datatype/table_with_string_and_array,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)" - }, - { - "id": "urn:li:container:189046201d696e7810132cfa64dad337", - "urn": "urn:li:container:189046201d696e7810132cfa64dad337" - }, - { - "id": "urn:li:container:acf0f3806f475a7397ee745329ef2967", - "urn": "urn:li:container:acf0f3806f475a7397ee745329ef2967" - }, - { - "id": "urn:li:container:1876d057d0ee364677b85427342e2c82", - "urn": "urn:li:container:1876d057d0ee364677b85427342e2c82" - }, - { - "id": "urn:li:container:7888b6dab77b7e77709699c9a1b81aa4", - "urn": "urn:li:container:7888b6dab77b7e77709699c9a1b81aa4" - }, - { - "id": "urn:li:container:401e53437a2ce6094ab3021cb32919d9", - "urn": "urn:li:container:401e53437a2ce6094ab3021cb32919d9" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "tables_with_nested_datatypes.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables_nested_datatype/table_with_string_and_array_of_struct,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)" - }, - { - "id": "urn:li:container:189046201d696e7810132cfa64dad337", - "urn": "urn:li:container:189046201d696e7810132cfa64dad337" - }, - { - "id": "urn:li:container:acf0f3806f475a7397ee745329ef2967", - "urn": "urn:li:container:acf0f3806f475a7397ee745329ef2967" - }, - { - "id": "urn:li:container:1876d057d0ee364677b85427342e2c82", - "urn": "urn:li:container:1876d057d0ee364677b85427342e2c82" - }, - { - "id": "urn:li:container:7888b6dab77b7e77709699c9a1b81aa4", - "urn": "urn:li:container:7888b6dab77b7e77709699c9a1b81aa4" - }, - { - "id": "urn:li:container:401e53437a2ce6094ab3021cb32919d9", - "urn": "urn:li:container:401e53437a2ce6094ab3021cb32919d9" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "tables_with_nested_datatypes.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables_nested_datatype/table_with_nested_struct_2,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)", - "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:delta-lake,my-platform)" - }, - { - "id": "urn:li:container:189046201d696e7810132cfa64dad337", - "urn": "urn:li:container:189046201d696e7810132cfa64dad337" - }, - { - "id": "urn:li:container:acf0f3806f475a7397ee745329ef2967", - "urn": "urn:li:container:acf0f3806f475a7397ee745329ef2967" - }, - { - "id": "urn:li:container:1876d057d0ee364677b85427342e2c82", - "urn": "urn:li:container:1876d057d0ee364677b85427342e2c82" - }, - { - "id": "urn:li:container:7888b6dab77b7e77709699c9a1b81aa4", - "urn": "urn:li:container:7888b6dab77b7e77709699c9a1b81aa4" - }, - { - "id": "urn:li:container:401e53437a2ce6094ab3021cb32919d9", - "urn": "urn:li:container:401e53437a2ce6094ab3021cb32919d9" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "tables_with_nested_datatypes.json", - "lastRunId": "no-run-id-provided" - } -}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables_nested_datatype/table_with_string_and_nested_array_of_numbers,UAT)", diff --git a/metadata-ingestion/tests/integration/dynamodb/test_dynamodb.py b/metadata-ingestion/tests/integration/dynamodb/test_dynamodb.py index 33ecd0dcd7e07..4edbbbb3ffc64 100644 --- a/metadata-ingestion/tests/integration/dynamodb/test_dynamodb.py +++ b/metadata-ingestion/tests/integration/dynamodb/test_dynamodb.py @@ -68,6 +68,8 @@ def test_dynamodb(pytestconfig, tmp_path): "config": { "aws_access_key_id": "test", "aws_secret_access_key": "test", + "aws_session_token": "test", + "aws_region": "us-west-2", }, }, "sink": { @@ -97,6 +99,8 @@ def test_dynamodb(pytestconfig, tmp_path): "platform_instance": "dynamodb_test", "aws_access_key_id": "test", "aws_secret_access_key": "test", + "aws_session_token": "test", + "aws_region": "us-west-2", "classification": ClassificationConfig( enabled=True, classifiers=[ diff --git a/metadata-ingestion/tests/integration/fivetran/test_fivetran.py b/metadata-ingestion/tests/integration/fivetran/test_fivetran.py index dbfe1011a41fa..de1e5543f4be6 100644 --- a/metadata-ingestion/tests/integration/fivetran/test_fivetran.py +++ b/metadata-ingestion/tests/integration/fivetran/test_fivetran.py @@ -41,9 +41,10 @@ def default_query_results( return [] elif query == fivetran_log_query.get_connectors_query(): return connector_query_results - elif query == fivetran_log_query.get_table_lineage_query("calendar_elected"): + elif query == fivetran_log_query.get_table_lineage_query(): return [ { + "connector_id": "calendar_elected", "source_table_id": "10040", "source_table_name": "employee", "source_schema_name": "public", @@ -52,6 +53,7 @@ def default_query_results( "destination_schema_name": "postgres_public", }, { + "connector_id": "calendar_elected", "source_table_id": "10041", "source_table_name": "company", "source_schema_name": "public", @@ -60,15 +62,29 @@ def default_query_results( "destination_schema_name": "postgres_public", }, ] - elif query == fivetran_log_query.get_column_lineage_query( - "10040", "7779" - ) or query == fivetran_log_query.get_column_lineage_query("10041", "7780"): + elif query == fivetran_log_query.get_column_lineage_query(): return [ { + "source_table_id": "10040", + "destination_table_id": "7779", + "source_column_name": "id", + "destination_column_name": "id", + }, + { + "source_table_id": "10040", + "destination_table_id": "7779", + "source_column_name": "name", + "destination_column_name": "name", + }, + { + "source_table_id": "10041", + "destination_table_id": "7780", "source_column_name": "id", "destination_column_name": "id", }, { + "source_table_id": "10041", + "destination_table_id": "7780", "source_column_name": "name", "destination_column_name": "name", }, @@ -82,46 +98,63 @@ def default_query_results( "email": "abc.xyz@email.com", } ] - elif query == fivetran_log_query.get_sync_start_logs_query("calendar_elected"): + elif query == fivetran_log_query.get_sync_logs_query(): return [ { - "time_stamp": datetime.datetime(2023, 9, 20, 6, 37, 32, 606000), + "connector_id": "calendar_elected", "sync_id": "4c9a03d6-eded-4422-a46a-163266e58243", + "message_event": "sync_start", + "message_data": None, + "time_stamp": datetime.datetime(2023, 9, 20, 6, 37, 32, 606000), }, { - "time_stamp": datetime.datetime(2023, 10, 3, 14, 35, 30, 345000), + "connector_id": "calendar_elected", "sync_id": "f773d1e9-c791-48f4-894f-8cf9b3dfc834", + "message_event": "sync_start", + "message_data": None, + "time_stamp": datetime.datetime(2023, 10, 3, 14, 35, 30, 345000), }, { - "time_stamp": datetime.datetime(2023, 10, 3, 14, 35, 55, 401000), + "connector_id": "calendar_elected", "sync_id": "63c2fc85-600b-455f-9ba0-f576522465be", + "message_event": "sync_start", + "message_data": None, + "time_stamp": datetime.datetime(2023, 10, 3, 14, 35, 55, 401000), }, { - "time_stamp": datetime.datetime(2023, 10, 3, 14, 37, 5, 403000), + "connector_id": "calendar_elected", "sync_id": "e773e1e9-c791-46f4-894f-8ch9b3dfc832", + "message_event": "sync_start", + "message_data": None, + "time_stamp": datetime.datetime(2023, 10, 3, 14, 37, 5, 403000), }, - ] - elif query == fivetran_log_query.get_sync_end_logs_query("calendar_elected"): - return [ { - "time_stamp": datetime.datetime(2023, 9, 20, 6, 38, 5, 56000), + "connector_id": "calendar_elected", "sync_id": "4c9a03d6-eded-4422-a46a-163266e58243", + "message_event": "sync_end", "message_data": '"{\\"status\\":\\"SUCCESSFUL\\"}"', + "time_stamp": datetime.datetime(2023, 9, 20, 6, 38, 5, 56000), }, { - "time_stamp": datetime.datetime(2023, 10, 3, 14, 35, 31, 512000), + "connector_id": "calendar_elected", "sync_id": "f773d1e9-c791-48f4-894f-8cf9b3dfc834", + "message_event": "sync_end", "message_data": '"{\\"reason\\":\\"Sync has been cancelled because of a user action in the dashboard.Standard Config updated.\\",\\"status\\":\\"CANCELED\\"}"', + "time_stamp": datetime.datetime(2023, 10, 3, 14, 35, 31, 512000), }, { - "time_stamp": datetime.datetime(2023, 10, 3, 14, 36, 29, 678000), + "connector_id": "calendar_elected", "sync_id": "63c2fc85-600b-455f-9ba0-f576522465be", + "message_event": "sync_end", "message_data": '"{\\"reason\\":\\"java.lang.RuntimeException: FATAL: too many connections for role \\\\\\"hxwraqld\\\\\\"\\",\\"taskType\\":\\"reconnect\\",\\"status\\":\\"FAILURE_WITH_TASK\\"}"', + "time_stamp": datetime.datetime(2023, 10, 3, 14, 36, 29, 678000), }, { - "time_stamp": datetime.datetime(2023, 10, 3, 14, 37, 35, 478000), + "connector_id": "calendar_elected", "sync_id": "e773e1e9-c791-46f4-894f-8ch9b3dfc832", + "message_event": "sync_end", "message_data": None, + "time_stamp": datetime.datetime(2023, 10, 3, 14, 37, 35, 478000), }, ] # Unreachable code diff --git a/metadata-ingestion/tests/integration/git/test_git_clone.py b/metadata-ingestion/tests/integration/git/test_git_clone.py index cf1f649825e0c..773e84cbf7488 100644 --- a/metadata-ingestion/tests/integration/git/test_git_clone.py +++ b/metadata-ingestion/tests/integration/git/test_git_clone.py @@ -123,15 +123,13 @@ def test_git_clone_private(tmp_path): branch="d380a2b777ec6f4653626f39c68dba85893faa74", ) assert checkout_dir.exists() - assert set(os.listdir(checkout_dir)) == set( - [ - ".datahub", - "models", - "README.md", - ".github", - ".git", - "views", - "manifest_lock.lkml", - "manifest.lkml", - ] - ) + assert set(os.listdir(checkout_dir)) == { + ".datahub", + "models", + "README.md", + ".github", + ".git", + "views", + "manifest_lock.lkml", + "manifest.lkml", + } diff --git a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_1.json b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_1.json index 3b07d651d0dcf..2fad0643e5027 100644 --- a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_1.json +++ b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_1.json @@ -211,6 +211,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.map_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "map_test" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258696" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -272,23 +337,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "totalSize": "0", - "numRows": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test", - "create_date": "2023-11-24" - }, - "name": "map_test", - "tags": [] - } } ] } @@ -358,6 +406,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.union_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "union_test" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258696" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -476,23 +589,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "numRows": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test", - "create_date": "2023-11-24" - }, - "name": "union_test", - "tags": [] - } } ] } @@ -562,6 +658,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.nested_struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "nested_struct_test" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258695" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -672,23 +833,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "totalSize": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "numRows": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test", - "create_date": "2023-11-24" - }, - "name": "nested_struct_test", - "tags": [] - } } ] } @@ -758,6 +902,86 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "array_struct_test" + }, + { + "op": "add", + "path": "/description", + "value": "This table has array of structs" + }, + { + "op": "add", + "path": "/customProperties/another.comment", + "value": "This table has no partitions" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "1" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "1" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "32" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258689" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "33" + }, + { + "op": "add", + "path": "/customProperties/comment", + "value": "This table has array of structs" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -852,26 +1076,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "another.comment": "This table has no partitions", - "numRows": "1", - "rawDataSize": "32", - "totalSize": "33", - "numFiles": "1", - "transient_lastDdlTime": "1700805674", - "comment": "This table has array of structs", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test", - "create_date": "2023-11-24" - }, - "name": "array_struct_test", - "description": "This table has array of structs", - "tags": [] - } } ] } @@ -941,6 +1145,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "struct_test" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258680" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1029,23 +1298,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805671", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test", - "create_date": "2023-11-24" - }, - "name": "struct_test", - "tags": [] - } } ] } @@ -1115,6 +1367,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1._test_table_underscore,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "_test_table_underscore" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258680" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1173,23 +1490,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805671", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore", - "create_date": "2023-11-24" - }, - "name": "_test_table_underscore", - "tags": [] - } } ] } @@ -1259,6 +1559,51 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.pokes,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "pokes" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258672" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + }, + { + "op": "add", + "path": "/customProperties/partitioned_columns", + "value": "baz" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1290,31 +1635,31 @@ }, "fields": [ { - "fieldPath": "[version=2.0].[type=int].foo", + "fieldPath": "[version=2.0].[type=string].baz", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "int", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false, - "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" + "isPartitioningKey": true, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" }, { - "fieldPath": "[version=2.0].[type=string].baz", + "fieldPath": "[version=2.0].[type=int].foo", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "int", "recursive": false, "isPartOfKey": false, - "isPartitioningKey": true, - "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" }, { "fieldPath": "[version=2.0].[type=string].bar", @@ -1331,19 +1676,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805669", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes", - "create_date": "2023-11-24", - "partitioned_columns": "baz" - }, - "name": "pokes", - "tags": [] - } } ] } diff --git a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_2.json b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_2.json index 8dfed3de760cc..58e1e11c8dd76 100644 --- a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_2.json +++ b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_2.json @@ -211,6 +211,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,db1.map_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "map_test" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258696" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -272,23 +337,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "totalSize": "0", - "numRows": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test", - "create_date": "2023-11-24" - }, - "name": "map_test", - "tags": [] - } } ] } @@ -358,6 +406,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,db1.union_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "union_test" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258696" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -476,23 +589,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "numRows": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test", - "create_date": "2023-11-24" - }, - "name": "union_test", - "tags": [] - } } ] } @@ -562,6 +658,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,db1.nested_struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "nested_struct_test" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258695" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -672,23 +833,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "totalSize": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "numRows": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test", - "create_date": "2023-11-24" - }, - "name": "nested_struct_test", - "tags": [] - } } ] } @@ -758,6 +902,86 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,db1.array_struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "array_struct_test" + }, + { + "op": "add", + "path": "/description", + "value": "This table has array of structs" + }, + { + "op": "add", + "path": "/customProperties/another.comment", + "value": "This table has no partitions" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "1" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "1" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "32" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258689" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "33" + }, + { + "op": "add", + "path": "/customProperties/comment", + "value": "This table has array of structs" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -852,26 +1076,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "another.comment": "This table has no partitions", - "numRows": "1", - "rawDataSize": "32", - "totalSize": "33", - "numFiles": "1", - "transient_lastDdlTime": "1700805674", - "comment": "This table has array of structs", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test", - "create_date": "2023-11-24" - }, - "name": "array_struct_test", - "description": "This table has array of structs", - "tags": [] - } } ] } @@ -941,6 +1145,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,db1.struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "struct_test" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258680" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1029,23 +1298,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805671", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test", - "create_date": "2023-11-24" - }, - "name": "struct_test", - "tags": [] - } } ] } @@ -1115,6 +1367,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,db1._test_table_underscore,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "_test_table_underscore" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258680" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1173,23 +1490,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805671", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore", - "create_date": "2023-11-24" - }, - "name": "_test_table_underscore", - "tags": [] - } } ] } @@ -1259,6 +1559,51 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,db1.pokes,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "pokes" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258672" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + }, + { + "op": "add", + "path": "/customProperties/partitioned_columns", + "value": "baz" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1290,31 +1635,31 @@ }, "fields": [ { - "fieldPath": "[version=2.0].[type=int].foo", + "fieldPath": "[version=2.0].[type=string].baz", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "int", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false, - "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" + "isPartitioningKey": true, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" }, { - "fieldPath": "[version=2.0].[type=string].baz", + "fieldPath": "[version=2.0].[type=int].foo", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "int", "recursive": false, "isPartOfKey": false, - "isPartitioningKey": true, - "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" }, { "fieldPath": "[version=2.0].[type=string].bar", @@ -1331,19 +1676,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805669", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes", - "create_date": "2023-11-24", - "partitioned_columns": "baz" - }, - "name": "pokes", - "tags": [] - } } ] } diff --git a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_3.json b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_3.json index b88149cd333e9..78db506868679 100644 --- a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_3.json +++ b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_3.json @@ -211,6 +211,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.map_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "map_test" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258696" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -272,23 +337,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "totalSize": "0", - "numRows": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test", - "create_date": "2023-11-24" - }, - "name": "map_test", - "tags": [] - } } ] } @@ -358,6 +406,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.union_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "union_test" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258696" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -476,23 +589,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "numRows": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test", - "create_date": "2023-11-24" - }, - "name": "union_test", - "tags": [] - } } ] } @@ -562,6 +658,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.nested_struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "nested_struct_test" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258695" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -672,23 +833,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "totalSize": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "numRows": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test", - "create_date": "2023-11-24" - }, - "name": "nested_struct_test", - "tags": [] - } } ] } @@ -758,6 +902,86 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.array_struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "array_struct_test" + }, + { + "op": "add", + "path": "/description", + "value": "This table has array of structs" + }, + { + "op": "add", + "path": "/customProperties/another.comment", + "value": "This table has no partitions" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "1" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "1" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "32" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258689" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "33" + }, + { + "op": "add", + "path": "/customProperties/comment", + "value": "This table has array of structs" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -852,26 +1076,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "another.comment": "This table has no partitions", - "numRows": "1", - "rawDataSize": "32", - "totalSize": "33", - "numFiles": "1", - "transient_lastDdlTime": "1700805674", - "comment": "This table has array of structs", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test", - "create_date": "2023-11-24" - }, - "name": "array_struct_test", - "description": "This table has array of structs", - "tags": [] - } } ] } @@ -941,6 +1145,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "struct_test" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258680" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1029,23 +1298,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805671", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test", - "create_date": "2023-11-24" - }, - "name": "struct_test", - "tags": [] - } } ] } @@ -1115,6 +1367,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1._test_table_underscore,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "_test_table_underscore" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258680" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1173,23 +1490,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805671", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore", - "create_date": "2023-11-24" - }, - "name": "_test_table_underscore", - "tags": [] - } } ] } @@ -1259,6 +1559,51 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,metastore.db1.pokes,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "pokes" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258672" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + }, + { + "op": "add", + "path": "/customProperties/partitioned_columns", + "value": "baz" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1290,31 +1635,31 @@ }, "fields": [ { - "fieldPath": "[version=2.0].[type=int].foo", + "fieldPath": "[version=2.0].[type=string].baz", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "int", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false, - "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" + "isPartitioningKey": true, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" }, { - "fieldPath": "[version=2.0].[type=string].baz", + "fieldPath": "[version=2.0].[type=int].foo", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "int", "recursive": false, "isPartOfKey": false, - "isPartitioningKey": true, - "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" }, { "fieldPath": "[version=2.0].[type=string].bar", @@ -1331,19 +1676,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805669", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes", - "create_date": "2023-11-24", - "partitioned_columns": "baz" - }, - "name": "pokes", - "tags": [] - } } ] } diff --git a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_4.json b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_4.json index aecb60f6347d3..193e1e23b9de4 100644 --- a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_4.json +++ b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_4.json @@ -211,6 +211,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.map_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "map_test" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258696" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -272,23 +337,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "totalSize": "0", - "numRows": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test", - "create_date": "2023-11-24" - }, - "name": "map_test", - "tags": [] - } } ] } @@ -358,6 +406,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.union_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "union_test" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258696" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -476,23 +589,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "numRows": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test", - "create_date": "2023-11-24" - }, - "name": "union_test", - "tags": [] - } } ] } @@ -562,6 +658,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.nested_struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "nested_struct_test" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258695" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -672,23 +833,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "totalSize": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "numRows": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test", - "create_date": "2023-11-24" - }, - "name": "nested_struct_test", - "tags": [] - } } ] } @@ -758,6 +902,86 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.array_struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "array_struct_test" + }, + { + "op": "add", + "path": "/description", + "value": "This table has array of structs" + }, + { + "op": "add", + "path": "/customProperties/another.comment", + "value": "This table has no partitions" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "1" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "1" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "32" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258689" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "33" + }, + { + "op": "add", + "path": "/customProperties/comment", + "value": "This table has array of structs" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -852,26 +1076,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "another.comment": "This table has no partitions", - "numRows": "1", - "rawDataSize": "32", - "totalSize": "33", - "numFiles": "1", - "transient_lastDdlTime": "1700805674", - "comment": "This table has array of structs", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test", - "create_date": "2023-11-24" - }, - "name": "array_struct_test", - "description": "This table has array of structs", - "tags": [] - } } ] } @@ -941,6 +1145,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "struct_test" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258680" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1029,23 +1298,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805671", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test", - "create_date": "2023-11-24" - }, - "name": "struct_test", - "tags": [] - } } ] } @@ -1115,6 +1367,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1._test_table_underscore,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "_test_table_underscore" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258680" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1173,23 +1490,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805671", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore", - "create_date": "2023-11-24" - }, - "name": "_test_table_underscore", - "tags": [] - } } ] } @@ -1259,6 +1559,51 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:presto-on-hive,metastore.db1.pokes,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "pokes" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258672" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + }, + { + "op": "add", + "path": "/customProperties/partitioned_columns", + "value": "baz" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1290,31 +1635,31 @@ }, "fields": [ { - "fieldPath": "[version=2.0].[type=int].foo", + "fieldPath": "[version=2.0].[type=string].baz", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "int", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false, - "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" + "isPartitioningKey": true, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" }, { - "fieldPath": "[version=2.0].[type=string].baz", + "fieldPath": "[version=2.0].[type=int].foo", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "int", "recursive": false, "isPartOfKey": false, - "isPartitioningKey": true, - "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" }, { "fieldPath": "[version=2.0].[type=string].bar", @@ -1331,19 +1676,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805669", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes", - "create_date": "2023-11-24", - "partitioned_columns": "baz" - }, - "name": "pokes", - "tags": [] - } } ] } diff --git a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_5.json b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_5.json index dff32615d1bdf..ce7ebdd299579 100644 --- a/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_5.json +++ b/metadata-ingestion/tests/integration/hive-metastore/hive_metastore_mces_golden_5.json @@ -211,6 +211,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.map_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "map_test" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258696" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -272,23 +337,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "totalSize": "0", - "numRows": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/map_test", - "create_date": "2023-11-24" - }, - "name": "map_test", - "tags": [] - } } ] } @@ -358,6 +406,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.union_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "union_test" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258696" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -476,23 +589,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "numRows": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/union_test", - "create_date": "2023-11-24" - }, - "name": "union_test", - "tags": [] - } } ] } @@ -562,6 +658,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.nested_struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "nested_struct_test" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258695" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -672,23 +833,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "totalSize": "0", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "transient_lastDdlTime": "1700805676", - "rawDataSize": "0", - "numRows": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/nested_struct_test", - "create_date": "2023-11-24" - }, - "name": "nested_struct_test", - "tags": [] - } } ] } @@ -758,6 +902,86 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.array_struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "array_struct_test" + }, + { + "op": "add", + "path": "/description", + "value": "This table has array of structs" + }, + { + "op": "add", + "path": "/customProperties/another.comment", + "value": "This table has no partitions" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "1" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "1" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "32" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258689" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "33" + }, + { + "op": "add", + "path": "/customProperties/comment", + "value": "This table has array of structs" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -852,26 +1076,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "another.comment": "This table has no partitions", - "numRows": "1", - "rawDataSize": "32", - "totalSize": "33", - "numFiles": "1", - "transient_lastDdlTime": "1700805674", - "comment": "This table has array of structs", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/array_struct_test", - "create_date": "2023-11-24" - }, - "name": "array_struct_test", - "description": "This table has array of structs", - "tags": [] - } } ] } @@ -941,6 +1145,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.struct_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "struct_test" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258680" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1029,23 +1298,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805671", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/struct_test", - "create_date": "2023-11-24" - }, - "name": "struct_test", - "tags": [] - } } ] } @@ -1115,6 +1367,71 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1._test_table_underscore,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "_test_table_underscore" + }, + { + "op": "add", + "path": "/customProperties/numRows", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/numFiles", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/COLUMN_STATS_ACCURATE", + "value": "{\"BASIC_STATS\":\"true\"}" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258680" + }, + { + "op": "add", + "path": "/customProperties/rawDataSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/totalSize", + "value": "0" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1173,23 +1490,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805671", - "numFiles": "0", - "COLUMN_STATS_ACCURATE": "{\"BASIC_STATS\":\"true\"}", - "rawDataSize": "0", - "numRows": "0", - "totalSize": "0", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/_test_table_underscore", - "create_date": "2023-11-24" - }, - "name": "_test_table_underscore", - "tags": [] - } } ] } @@ -1259,6 +1559,51 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,db1.pokes,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/name", + "value": "pokes" + }, + { + "op": "add", + "path": "/customProperties/transient_lastDdlTime", + "value": "1715258672" + }, + { + "op": "add", + "path": "/customProperties/table_type", + "value": "MANAGED_TABLE" + }, + { + "op": "add", + "path": "/customProperties/table_location", + "value": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes" + }, + { + "op": "add", + "path": "/customProperties/create_date", + "value": "2024-05-09" + }, + { + "op": "add", + "path": "/customProperties/partitioned_columns", + "value": "baz" + } + ] + }, + "systemMetadata": { + "lastObserved": 1632398400000, + "runId": "hive-metastore-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -1290,31 +1635,31 @@ }, "fields": [ { - "fieldPath": "foo", + "fieldPath": "baz", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} + "com.linkedin.pegasus2avro.schema.StringType": {} } }, - "nativeDataType": "int", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false, - "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" + "isPartitioningKey": true, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" }, { - "fieldPath": "baz", + "fieldPath": "foo", "nullable": true, "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "int", "recursive": false, "isPartOfKey": false, - "isPartitioningKey": true, - "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + "jsonProps": "{\"native_data_type\": \"int\", \"_nullable\": true}" }, { "fieldPath": "bar", @@ -1331,19 +1676,6 @@ } ] } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "transient_lastDdlTime": "1700805669", - "table_type": "MANAGED_TABLE", - "table_location": "hdfs://namenode:8020/user/hive/warehouse/db1.db/pokes", - "create_date": "2023-11-24", - "partitioned_columns": "baz" - }, - "name": "pokes", - "tags": [] - } } ] } diff --git a/metadata-ingestion/tests/integration/hive-metastore/test_hive_metastore.py b/metadata-ingestion/tests/integration/hive-metastore/test_hive_metastore.py index 2ed0e6198dc00..dbc1d0706c4b6 100644 --- a/metadata-ingestion/tests/integration/hive-metastore/test_hive_metastore.py +++ b/metadata-ingestion/tests/integration/hive-metastore/test_hive_metastore.py @@ -1,6 +1,6 @@ import re import subprocess -from typing import Dict +from typing import Dict, Sequence import pytest import requests @@ -120,18 +120,28 @@ def test_hive_metastore_ingest( # config_file = (test_resources_dir / "presto_on_hive_to_file.yml").resolve() # run_datahub_cmd(["ingest", "-c", f"{config_file}"]) + ignore_paths: Sequence[str] = [ + r"root\[\d+\]\['proposedSnapshot'\]\['com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot'\]\['aspects'\]\[\d+\]\['com.linkedin.pegasus2avro.dataset.DatasetProperties'\]\['customProperties'\]\['transient_lastDdlTime'\]", + r"root\[\d+\]\['proposedSnapshot'\]\['com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot'\]\['aspects'\]\[\d+\]\['com.linkedin.pegasus2avro.dataset.DatasetProperties'\]\['customProperties'\]\['numfiles'\]", + r"root\[\d+\]\['proposedSnapshot'\]\['com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot'\]\['aspects'\]\[\d+\]\['com.linkedin.pegasus2avro.dataset.DatasetProperties'\]\['customProperties'\]\['totalsize'\]", + r"root\[\d+\]\['proposedSnapshot'\]\['com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot'\]\['aspects'\]\[\d+\]\['com.linkedin.pegasus2avro.dataset.DatasetProperties'\]\['customProperties'\]\['create_date'\]", + ] + + ignore_paths_v2: Sequence[str] = [ + "/customProperties/create_date", + "/customProperties/transient_lastDdlTime", + "/customProperties/numfiles", + "/customProperties/totalsize", + ] + # Verify the output. mce_helpers.check_golden_file( pytestconfig, output_path=f"hive_metastore_mces{test_suffix}.json", golden_path=test_resources_dir / f"hive_metastore_mces_golden{test_suffix}.json", - ignore_paths=[ - r"root\[\d+\]\['proposedSnapshot'\]\['com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot'\]\['aspects'\]\[\d+\]\['com.linkedin.pegasus2avro.dataset.DatasetProperties'\]\['customProperties'\]\['transient_lastDdlTime'\]", - r"root\[\d+\]\['proposedSnapshot'\]\['com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot'\]\['aspects'\]\[\d+\]\['com.linkedin.pegasus2avro.dataset.DatasetProperties'\]\['customProperties'\]\['numfiles'\]", - r"root\[\d+\]\['proposedSnapshot'\]\['com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot'\]\['aspects'\]\[\d+\]\['com.linkedin.pegasus2avro.dataset.DatasetProperties'\]\['customProperties'\]\['totalsize'\]", - r"root\[\d+\]\['proposedSnapshot'\]\['com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot'\]\['aspects'\]\[\d+\]\['com.linkedin.pegasus2avro.dataset.DatasetProperties'\]\['customProperties'\]\['create_date'\]", - ], + ignore_paths=ignore_paths, + ignore_paths_v2=ignore_paths_v2, ) diff --git a/metadata-ingestion/tests/integration/iceberg/test_iceberg.py b/metadata-ingestion/tests/integration/iceberg/test_iceberg.py index a9ab43169405d..24a636077bfdd 100644 --- a/metadata-ingestion/tests/integration/iceberg/test_iceberg.py +++ b/metadata-ingestion/tests/integration/iceberg/test_iceberg.py @@ -31,9 +31,7 @@ def remove_docker_image(): def spark_submit(file_path: str, args: str = "") -> None: docker = "docker" command = f"{docker} exec spark-iceberg spark-submit {file_path} {args}" - ret = subprocess.run( - command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE - ) + ret = subprocess.run(command, shell=True, capture_output=True) assert ret.returncode == 0 diff --git a/metadata-ingestion/tests/integration/kafka-connect/kafka_connect_mces_golden.json b/metadata-ingestion/tests/integration/kafka-connect/kafka_connect_mces_golden.json index 1c7f481e7063e..f992ce6e60ddd 100644 --- a/metadata-ingestion/tests/integration/kafka-connect/kafka_connect_mces_golden.json +++ b/metadata-ingestion/tests/integration/kafka-connect/kafka_connect_mces_golden.json @@ -756,16 +756,13 @@ } }, { - "entityType": "dataJob", - "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(kafka-connect,mysql_source4,PROD),unknown_source.query-topic)", + "entityType": "dataFlow", + "entityUrn": "urn:li:dataFlow:(kafka-connect,debezium-mysql-connector,PROD)", "changeType": "UPSERT", - "aspectName": "dataJobInputOutput", + "aspectName": "status", "aspect": { "json": { - "inputDatasets": [], - "outputDatasets": [ - "urn:li:dataset:(urn:li:dataPlatform:kafka,query-topic,PROD)" - ] + "removed": false } }, "systemMetadata": { @@ -774,13 +771,16 @@ } }, { - "entityType": "dataFlow", - "entityUrn": "urn:li:dataFlow:(kafka-connect,debezium-mysql-connector,PROD)", + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(kafka-connect,mysql_source4,PROD),unknown_source.query-topic)", "changeType": "UPSERT", - "aspectName": "status", + "aspectName": "dataJobInputOutput", "aspect": { "json": { - "removed": false + "inputDatasets": [], + "outputDatasets": [ + "urn:li:dataset:(urn:li:dataPlatform:kafka,query-topic,PROD)" + ] } }, "systemMetadata": { diff --git a/metadata-ingestion/tests/integration/kafka-connect/kafka_connect_to_file.yml b/metadata-ingestion/tests/integration/kafka-connect/kafka_connect_to_file.yml index f2d8dd7b860b7..4946cae8c4859 100644 --- a/metadata-ingestion/tests/integration/kafka-connect/kafka_connect_to_file.yml +++ b/metadata-ingestion/tests/integration/kafka-connect/kafka_connect_to_file.yml @@ -11,6 +11,12 @@ source: - source_mongodb_connector - confluent_s3_sink_connector provided_configs: + - provider: env + path_key: MYSQL_PORT + value: 3306 + - provider: env + path_key: MYSQL_DB + value: librarydb - provider: env path_key: MYSQL_CONNECTION_URL value: jdbc:mysql://test_mysql:3306/librarydb diff --git a/metadata-ingestion/tests/integration/kafka-connect/setup/connect.env b/metadata-ingestion/tests/integration/kafka-connect/setup/connect.env index 40d5dcfc9a91a..204e7548b3374 100644 --- a/metadata-ingestion/tests/integration/kafka-connect/setup/connect.env +++ b/metadata-ingestion/tests/integration/kafka-connect/setup/connect.env @@ -23,5 +23,7 @@ CONNECT_PLUGIN_PATH=/usr/share/confluent-hub-components, /usr/local/share/kafka/ CONNECT_CONFIG_PROVIDERS=env CONNECT_CONFIG_PROVIDERS_ENV_CLASS=io.strimzi.kafka.EnvVarConfigProvider MYSQL_CONNECTION_URL=jdbc:mysql://foo:datahub@test_mysql:3306/librarydb +MYSQL_PORT=3306 +MYSQL_DB=librarydb POSTGRES_CONNECTION_URL=jdbc:postgresql://test_postgres:5432/postgres?user=postgres&password=datahub S3_ENDPOINT_URL=http://s3mock:9090 \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py b/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py index 8cf76cfb26af7..26f3d50c1167b 100644 --- a/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py +++ b/metadata-ingestion/tests/integration/kafka-connect/test_kafka_connect.py @@ -88,9 +88,7 @@ def test_resources_dir(pytestconfig): def loaded_kafka_connect(kafka_connect_runner): # # Setup mongo cluster command = "docker exec test_mongo mongosh test_db -f /scripts/mongo-init.js" - ret = subprocess.run( - command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE - ) + ret = subprocess.run(command, shell=True, capture_output=True) assert ret.returncode == 0 # Creating MySQL source with no transformations , only topic prefix @@ -167,7 +165,7 @@ def loaded_kafka_connect(kafka_connect_runner): "query": "select * from member", "topic.prefix": "query-topic", "tasks.max": "1", - "connection.url": "${env:MYSQL_CONNECTION_URL}" + "connection.url": "jdbc:mysql://foo:datahub@test_mysql:${env:MYSQL_PORT}/${env:MYSQL_DB}" } } """, @@ -298,9 +296,7 @@ def loaded_kafka_connect(kafka_connect_runner): assert r.status_code == 201 # Created command = "docker exec test_mongo mongosh test_db -f /scripts/mongo-populate.js" - ret = subprocess.run( - command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE - ) + ret = subprocess.run(command, shell=True, capture_output=True) assert ret.returncode == 0 # Creating S3 Sink source @@ -419,7 +415,17 @@ def test_kafka_connect_ingest_stateful( "provider": "env", "path_key": "MYSQL_CONNECTION_URL", "value": "jdbc:mysql://test_mysql:3306/librarydb", - } + }, + { + "provider": "env", + "path_key": "MYSQL_PORT", + "value": "3306", + }, + { + "provider": "env", + "path_key": "MYSQL_DB", + "value": "librarydb", + }, ], "stateful_ingestion": { "enabled": True, diff --git a/metadata-ingestion/tests/integration/kafka/test_kafka_state.py b/metadata-ingestion/tests/integration/kafka/test_kafka_state.py index 6dfc0427f76c1..24e81fbf128b0 100644 --- a/metadata-ingestion/tests/integration/kafka/test_kafka_state.py +++ b/metadata-ingestion/tests/integration/kafka/test_kafka_state.py @@ -40,9 +40,9 @@ def create_kafka_topics(self, topics: List[NewTopic]) -> None: for topic, f in fs.items(): try: f.result() # The result itself is None - print("Topic {} created".format(topic)) + print(f"Topic {topic} created") except Exception as e: - print("Failed to create topic {}: {}".format(topic, e)) + print(f"Failed to create topic {topic}: {e}") raise e def delete_kafka_topics(self, topics: List[str]) -> None: @@ -60,11 +60,11 @@ def delete_kafka_topics(self, topics: List[str]) -> None: for topic, f in fs.items(): try: f.result() # The result itself is None - print("Topic {} deleted".format(topic)) + print(f"Topic {topic} deleted") except Exception as e: # this error should be ignored when we already deleted # the topic within the test code - print("Failed to delete topic {}: {}".format(topic, e)) + print(f"Failed to delete topic {topic}: {e}") def __enter__(self): topics = [ diff --git a/metadata-ingestion/tests/integration/looker/test_looker.py b/metadata-ingestion/tests/integration/looker/test_looker.py index 8e167328e0ba2..c2314e65bd367 100644 --- a/metadata-ingestion/tests/integration/looker/test_looker.py +++ b/metadata-ingestion/tests/integration/looker/test_looker.py @@ -9,6 +9,7 @@ from looker_sdk.rtl import transport from looker_sdk.rtl.transport import TransportOptions from looker_sdk.sdk.api40.models import ( + Category, Dashboard, DashboardElement, FolderBase, @@ -24,7 +25,8 @@ ) from datahub.ingestion.run.pipeline import Pipeline, PipelineInitError -from datahub.ingestion.source.looker import looker_usage +from datahub.ingestion.source.looker import looker_common, looker_usage +from datahub.ingestion.source.looker.looker_common import LookerExplore from datahub.ingestion.source.looker.looker_lib_wrapper import ( LookerAPI, LookerAPIConfig, @@ -993,3 +995,66 @@ def test_independent_soft_deleted_looks( assert len(looks) == 2 assert looks[0].title == "Outer Look" assert looks[1].title == "Soft Deleted" + + +@freeze_time(FROZEN_TIME) +def test_upstream_cll(pytestconfig, tmp_path, mock_time, mock_datahub_graph): + mocked_client = mock.MagicMock() + + with mock.patch( + "datahub.ingestion.source.state_provider.datahub_ingestion_checkpointing_provider.DataHubGraph", + mock_datahub_graph, + ) as mock_checkpoint, mock.patch("looker_sdk.init40") as mock_sdk: + mock_checkpoint.return_value = mock_datahub_graph + + mock_sdk.return_value = mocked_client + setup_mock_explore( + mocked_client, + additional_lkml_fields=[ + LookmlModelExploreField( + name="dim2", + type="string", + dimension_group=None, + description="dimension one description", + label_short="Dimensions One Label", + view="underlying_view", + source_file="views/underlying_view.view.lkml", + ), + LookmlModelExploreField( + category=Category.dimension, + dimension_group="my_explore_name.createdon", + field_group_label="Createdon Date", + field_group_variant="Date", + label="Dataset Lineages Explore Createdon Date", + label_short="Createdon Date", + lookml_link="/projects/datahub-demo/files/views%2Fdatahub-demo%2Fdatasets%2Fdataset_lineages.view.lkml?line=5", + name="my_explore_name.createdon_date", + project_name="datahub-demo", + source_file="views/datahub-demo/datasets/dataset_lineages.view.lkml", + source_file_path="datahub-demo/views/datahub-demo/datasets/dataset_lineages.view.lkml", + sql='${TABLE}."CREATEDON" ', + suggest_dimension="my_explore_name.createdon_date", + suggest_explore="my_explore_name", + type="date_date", + view="my_explore_name", + view_label="Dataset Lineages Explore", + original_view="dataset_lineages", + ), + ], + ) + + looker_explore: Optional[LookerExplore] = looker_common.LookerExplore.from_api( + model="fake", + explore_name="my_explore_name", + client=mocked_client, + reporter=mock.MagicMock(), + source_config=mock.MagicMock(), + ) + + assert looker_explore is not None + assert looker_explore.name == "my_explore_name" + assert looker_explore.fields is not None + assert len(looker_explore.fields) == 3 + assert ( + looker_explore.fields[2].upstream_fields[0] == "dataset_lineages.createdon" + ) diff --git a/metadata-ingestion/tests/integration/lookml/duplicate_field_ingestion_golden.json b/metadata-ingestion/tests/integration/lookml/duplicate_field_ingestion_golden.json new file mode 100644 index 0000000000000..b06b59ba43654 --- /dev/null +++ b/metadata-ingestion/tests/integration/lookml/duplicate_field_ingestion_golden.json @@ -0,0 +1,488 @@ +[ +{ + "entityType": "container", + "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "looker", + "env": "PROD", + "project_name": "lkml_samples" + }, + "name": "lkml_samples" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:looker" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "LookML Project" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Folders" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "# File was added to check duplicate field issue\n\nview: dataset_lineages {\n sql_table_name: \"PUBLIC\".\"DATASET_LINEAGES\"\n ;;\n\n dimension: createdon {\n type: date\n sql: ${TABLE}.\"CREATEDON\" ;;\n }\n\n dimension_group: createdon {\n type: time\n timeframes: [\n raw,\n time,\n date,\n week,\n month,\n quarter,\n year\n ]\n sql: ${TABLE}.\"CREATEDON\" ;;\n }\n\n dimension: entity {\n type: string\n sql: ${TABLE}.\"ENTITY\" ;;\n }\n\n dimension: metadata {\n type: string\n sql: ${TABLE}.\"METADATA\" ;;\n }\n\n dimension: urn {\n type: string\n sql: ${TABLE}.\"URN\" ;;\n }\n\n dimension: version {\n type: number\n sql: ${TABLE}.\"VERSION\" ;;\n }\n\n measure: count {\n type: count\n drill_fields: []\n }\n}\n", + "viewLanguage": "lookml" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/Develop/lkml_samples/" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1586847600000, + "actor": "urn:li:corpuser:datahub" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:conn,.public.dataset_lineages,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.dataset_lineages,PROD),entity)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD),entity)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.dataset_lineages,PROD),metadata)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD),metadata)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.dataset_lineages,PROD),urn)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD),urn)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.dataset_lineages,PROD),version)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD),version)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.dataset_lineages,PROD),createdon)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD),createdon)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.dataset_lineages,PROD),count)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD),count)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "dataset_lineages", + "platform": "urn:li:dataPlatform:looker", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "entity", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "metadata", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "urn", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "version", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "number", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "createdon", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "time", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + }, + { + "tag": "urn:li:tag:Temporal" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "count", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "count", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Measure" + } + ] + }, + "isPartOfKey": false + } + ], + "primaryKeys": [] + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "looker.file.path": "dataset_lineages.view.lkml", + "looker.model": "data" + }, + "name": "dataset_lineages", + "tags": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.dataset_lineages,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Dimension", + "changeType": "UPSERT", + "aspectName": "tagKey", + "aspect": { + "json": { + "name": "Dimension" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Measure", + "changeType": "UPSERT", + "aspectName": "tagKey", + "aspect": { + "json": { + "name": "Measure" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Temporal", + "changeType": "UPSERT", + "aspectName": "tagKey", + "aspect": { + "json": { + "name": "Temporal" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/lookml/expected_output.json b/metadata-ingestion/tests/integration/lookml/expected_output.json index 68a328c64dea9..931830eecf0c3 100644 --- a/metadata-ingestion/tests/integration/lookml/expected_output.json +++ b/metadata-ingestion/tests/integration/lookml/expected_output.json @@ -142,30 +142,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -350,13 +326,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -370,12 +352,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "SELECT\n country,\n city,\n timestamp,\n measurement\n FROM\n ${my_view.SQL_TABLE_NAME} AS my_view", - "viewLanguage": "sql" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -388,10 +370,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "SELECT\n country,\n city,\n timestamp,\n measurement\n FROM\n ${my_view.SQL_TABLE_NAME} AS my_view", + "viewLanguage": "sql" } }, "systemMetadata": { @@ -404,18 +388,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -587,13 +563,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -607,12 +589,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "view: include_able_view {\n sql_table_name: looker_schema.include_able ;;\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -625,10 +607,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "view: include_able_view {\n sql_table_name: looker_schema.include_able ;;\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -641,18 +625,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -713,13 +689,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -733,12 +715,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -751,10 +733,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -767,18 +751,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -839,13 +815,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -859,12 +841,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -877,10 +859,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -893,18 +877,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -941,6 +917,19 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:conn,.looker_schema.events,PROD)", "type": "VIEW" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.looker_schema.events,PROD),additional_measure)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD),additional_measure)" + ], + "confidenceScore": 1.0 + } ] } }, @@ -1010,13 +999,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1030,12 +1025,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1048,10 +1043,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -1064,18 +1061,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1136,13 +1125,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1156,12 +1151,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1174,10 +1169,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -1190,18 +1187,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1262,13 +1251,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1282,12 +1277,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "date DATE encode ZSTD, \n platform VARCHAR(20) encode ZSTD AS aliased_platform, \n country VARCHAR(20) encode ZSTD", - "viewLanguage": "sql" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1300,10 +1295,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "date DATE encode ZSTD, \n platform VARCHAR(20) encode ZSTD AS aliased_platform, \n country VARCHAR(20) encode ZSTD", + "viewLanguage": "sql" } }, "systemMetadata": { @@ -1316,21 +1313,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - }, - { - "id": "nested" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1466,13 +1452,22 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + }, + { + "id": "nested" + } ] } }, @@ -1486,12 +1481,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "SELECT\n customer_id,\n SUM(sale_price) AS lifetime_spend\n FROM\n order\n WHERE\n {% condition order_region %} order.region {% endcondition %}\n GROUP BY 1", - "viewLanguage": "sql" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1504,10 +1499,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "SELECT\n customer_id,\n SUM(sale_price) AS lifetime_spend\n FROM\n order\n WHERE\n {% condition order_region %} order.region {% endcondition %}\n GROUP BY 1", + "viewLanguage": "sql" } }, "systemMetadata": { @@ -1520,18 +1517,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1592,13 +1581,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1612,12 +1607,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "view: ability {\n sql_table_name: \"ECOMMERCE\".\"ABILITY\"\n ;;\n\n dimension: pk {\n type: number\n sql: ${TABLE}.\"PK\" ;;\n }\n\n measure: count {\n type: count\n drill_fields: []\n }\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1630,10 +1625,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "view: ability {\n sql_table_name: \"ECOMMERCE\".\"ABILITY\"\n ;;\n\n dimension: pk {\n type: number\n sql: ${TABLE}.\"PK\" ;;\n }\n\n measure: count {\n type: count\n drill_fields: []\n }\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -1646,18 +1643,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1706,6 +1695,17 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD),pk)" ], "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.ecommerce.ability,PROD),count)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD),count)" + ], + "confidenceScore": 1.0 } ] } @@ -1797,13 +1797,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1817,12 +1823,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "view: owners {\n dimension: id {\n primary_key: yes\n sql: ${TABLE}.id ;;\n }\n dimension: owner_name {\n sql: ${TABLE}.owner_name ;;\n }\n}", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1835,10 +1841,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "view: owners {\n dimension: id {\n primary_key: yes\n sql: ${TABLE}.id ;;\n }\n dimension: owner_name {\n sql: ${TABLE}.owner_name ;;\n }\n}", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -1851,18 +1859,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -2015,13 +2015,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -2035,12 +2041,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "explore_source: my_view_explore {\n bind_all_filters: yes\n\n column: country {\n field: my_view_explore.country\n }\n\n column: city {\n field: my_view_explore.city\n }\n\n column: is_latest {\n field: my_view_explore.is_latest\n }\n\n derived_column: derived_col {\n sql: coalesce(country, 'US') ;;\n }\n}", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -2053,10 +2059,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "explore_source: my_view_explore {\n bind_all_filters: yes\n\n column: country {\n field: my_view_explore.country\n }\n\n column: city {\n field: my_view_explore.city\n }\n\n column: is_latest {\n field: my_view_explore.is_latest\n }\n\n derived_column: derived_col {\n sql: coalesce(country, 'US') ;;\n }\n}", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -2069,18 +2077,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -2140,6 +2140,28 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD),city)" ], "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view_explore,PROD),my_view_explore.country)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD),unique_countries)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view_explore,PROD),my_view_explore.is_latest)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD),derived_col)" + ], + "confidenceScore": 1.0 } ] } @@ -2273,13 +2295,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -2293,12 +2321,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "view: flights {\n sql_table_name: flightstats.accidents ;;\n\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: number\n sql: ${TABLE}.id ;;\n }\n}\n\n# override type of id parameter\nview: +flights {\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: string\n sql: ${TABLE}.id ;;\n }\n}", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -2311,10 +2339,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "view: flights {\n sql_table_name: flightstats.accidents ;;\n\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: number\n sql: ${TABLE}.id ;;\n }\n}\n\n# override type of id parameter\nview: +flights {\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: string\n sql: ${TABLE}.id ;;\n }\n}", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -2327,18 +2357,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -2457,6 +2479,30 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "tag", "entityUrn": "urn:li:tag:Dimension", diff --git a/metadata-ingestion/tests/integration/lookml/lkml_samples_duplicate_field/data.model.lkml b/metadata-ingestion/tests/integration/lookml/lkml_samples_duplicate_field/data.model.lkml new file mode 100644 index 0000000000000..ddad718721cfa --- /dev/null +++ b/metadata-ingestion/tests/integration/lookml/lkml_samples_duplicate_field/data.model.lkml @@ -0,0 +1,7 @@ +connection: "my_connection" + +include: "dataset_lineages.view.lkml" + +explore: explore_dataset_lineage { + from: dataset_lineages +} diff --git a/metadata-ingestion/tests/integration/lookml/lkml_samples_duplicate_field/dataset_lineages.view.lkml b/metadata-ingestion/tests/integration/lookml/lkml_samples_duplicate_field/dataset_lineages.view.lkml new file mode 100644 index 0000000000000..6062993f320d3 --- /dev/null +++ b/metadata-ingestion/tests/integration/lookml/lkml_samples_duplicate_field/dataset_lineages.view.lkml @@ -0,0 +1,50 @@ +# File was added to check duplicate field issue + +view: dataset_lineages { + sql_table_name: "PUBLIC"."DATASET_LINEAGES" + ;; + + dimension: createdon { + type: date + sql: ${TABLE}."CREATEDON" ;; + } + + dimension_group: createdon { + type: time + timeframes: [ + raw, + time, + date, + week, + month, + quarter, + year + ] + sql: ${TABLE}."CREATEDON" ;; + } + + dimension: entity { + type: string + sql: ${TABLE}."ENTITY" ;; + } + + dimension: metadata { + type: string + sql: ${TABLE}."METADATA" ;; + } + + dimension: urn { + type: string + sql: ${TABLE}."URN" ;; + } + + dimension: version { + type: number + sql: ${TABLE}."VERSION" ;; + } + + measure: count { + type: count + drill_fields: [] + } +} diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json index 2c0503154588a..a846e2ca84b09 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json @@ -142,30 +142,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -350,13 +326,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -370,12 +352,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "SELECT\n country,\n city,\n timestamp,\n measurement\n FROM\n ${my_view.SQL_TABLE_NAME} AS my_view", - "viewLanguage": "sql" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -388,10 +370,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "SELECT\n country,\n city,\n timestamp,\n measurement\n FROM\n ${my_view.SQL_TABLE_NAME} AS my_view", + "viewLanguage": "sql" } }, "systemMetadata": { @@ -404,18 +388,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -587,13 +563,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -607,12 +589,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "view: include_able_view {\n sql_table_name: looker_schema.include_able ;;\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -625,10 +607,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "view: include_able_view {\n sql_table_name: looker_schema.include_able ;;\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -641,18 +625,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -713,13 +689,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -733,12 +715,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -751,10 +733,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -767,18 +751,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -839,13 +815,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -859,12 +841,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -877,10 +859,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -893,18 +877,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -941,6 +917,19 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-foo.looker_schema.events,PROD)", "type": "VIEW" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,project-foo.looker_schema.events,PROD),additional_measure)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD),additional_measure)" + ], + "confidenceScore": 1.0 + } ] } }, @@ -1010,13 +999,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1030,12 +1025,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1048,10 +1043,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -1064,18 +1061,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1136,13 +1125,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1156,12 +1151,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1174,10 +1169,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -1190,18 +1187,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1262,13 +1251,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1282,12 +1277,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "date DATE encode ZSTD, \n platform VARCHAR(20) encode ZSTD AS aliased_platform, \n country VARCHAR(20) encode ZSTD", - "viewLanguage": "sql" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1300,10 +1295,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "date DATE encode ZSTD, \n platform VARCHAR(20) encode ZSTD AS aliased_platform, \n country VARCHAR(20) encode ZSTD", + "viewLanguage": "sql" } }, "systemMetadata": { @@ -1316,21 +1313,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - }, - { - "id": "nested" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1466,13 +1452,22 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + }, + { + "id": "nested" + } ] } }, @@ -1486,12 +1481,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "SELECT\n customer_id,\n SUM(sale_price) AS lifetime_spend\n FROM\n order\n WHERE\n {% condition order_region %} order.region {% endcondition %}\n GROUP BY 1", - "viewLanguage": "sql" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1504,10 +1499,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "SELECT\n customer_id,\n SUM(sale_price) AS lifetime_spend\n FROM\n order\n WHERE\n {% condition order_region %} order.region {% endcondition %}\n GROUP BY 1", + "viewLanguage": "sql" } }, "systemMetadata": { @@ -1520,18 +1517,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1592,13 +1581,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1612,12 +1607,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "view: ability {\n sql_table_name: \"ECOMMERCE\".\"ABILITY\"\n ;;\n\n dimension: pk {\n type: number\n sql: ${TABLE}.\"PK\" ;;\n }\n\n measure: count {\n type: count\n drill_fields: []\n }\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1630,10 +1625,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "view: ability {\n sql_table_name: \"ECOMMERCE\".\"ABILITY\"\n ;;\n\n dimension: pk {\n type: number\n sql: ${TABLE}.\"PK\" ;;\n }\n\n measure: count {\n type: count\n drill_fields: []\n }\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -1646,18 +1643,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1706,6 +1695,17 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD),pk)" ], "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:bigquery,project-foo.ecommerce.ability,PROD),count)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD),count)" + ], + "confidenceScore": 1.0 } ] } @@ -1797,13 +1797,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1817,12 +1823,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "view: owners {\n dimension: id {\n primary_key: yes\n sql: ${TABLE}.id ;;\n }\n dimension: owner_name {\n sql: ${TABLE}.owner_name ;;\n }\n}", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1835,10 +1841,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "view: owners {\n dimension: id {\n primary_key: yes\n sql: ${TABLE}.id ;;\n }\n dimension: owner_name {\n sql: ${TABLE}.owner_name ;;\n }\n}", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -1851,18 +1859,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -2015,13 +2015,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -2035,12 +2041,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "explore_source: my_view_explore {\n bind_all_filters: yes\n\n column: country {\n field: my_view_explore.country\n }\n\n column: city {\n field: my_view_explore.city\n }\n\n column: is_latest {\n field: my_view_explore.is_latest\n }\n\n derived_column: derived_col {\n sql: coalesce(country, 'US') ;;\n }\n}", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -2053,10 +2059,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "explore_source: my_view_explore {\n bind_all_filters: yes\n\n column: country {\n field: my_view_explore.country\n }\n\n column: city {\n field: my_view_explore.city\n }\n\n column: is_latest {\n field: my_view_explore.is_latest\n }\n\n derived_column: derived_col {\n sql: coalesce(country, 'US') ;;\n }\n}", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -2069,18 +2077,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -2140,6 +2140,28 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD),city)" ], "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view_explore,PROD),my_view_explore.country)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD),unique_countries)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view_explore,PROD),my_view_explore.is_latest)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD),derived_col)" + ], + "confidenceScore": 1.0 } ] } @@ -2273,13 +2295,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -2293,12 +2321,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "view: flights {\n sql_table_name: flightstats.accidents ;;\n\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: number\n sql: ${TABLE}.id ;;\n }\n}\n\n# override type of id parameter\nview: +flights {\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: string\n sql: ${TABLE}.id ;;\n }\n}", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -2311,10 +2339,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "view: flights {\n sql_table_name: flightstats.accidents ;;\n\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: number\n sql: ${TABLE}.id ;;\n }\n}\n\n# override type of id parameter\nview: +flights {\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: string\n sql: ${TABLE}.id ;;\n }\n}", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -2327,18 +2357,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -2457,6 +2479,30 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json index f56718c6f5c85..47d536fb82409 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json @@ -142,30 +142,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -350,13 +326,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -370,12 +352,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "SELECT\n country,\n city,\n timestamp,\n measurement\n FROM\n ${my_view.SQL_TABLE_NAME} AS my_view", - "viewLanguage": "sql" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -388,10 +370,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "SELECT\n country,\n city,\n timestamp,\n measurement\n FROM\n ${my_view.SQL_TABLE_NAME} AS my_view", + "viewLanguage": "sql" } }, "systemMetadata": { @@ -404,18 +388,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -587,13 +563,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -607,12 +589,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "view: include_able_view {\n sql_table_name: looker_schema.include_able ;;\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -625,10 +607,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "view: include_able_view {\n sql_table_name: looker_schema.include_able ;;\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -641,18 +625,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -713,13 +689,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -733,12 +715,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -751,10 +733,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -767,18 +751,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -839,13 +815,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -859,12 +841,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -877,10 +859,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -893,18 +877,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -941,6 +917,19 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:hive,looker_schema.events,PROD)", "type": "VIEW" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,looker_schema.events,PROD),additional_measure)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD),additional_measure)" + ], + "confidenceScore": 1.0 + } ] } }, @@ -1010,13 +999,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1030,12 +1025,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1048,10 +1043,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -1064,18 +1061,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1136,13 +1125,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1156,12 +1151,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1174,10 +1169,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -1190,18 +1187,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1262,13 +1251,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1282,12 +1277,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "date DATE encode ZSTD, \n platform VARCHAR(20) encode ZSTD AS aliased_platform, \n country VARCHAR(20) encode ZSTD", - "viewLanguage": "sql" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1300,10 +1295,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "date DATE encode ZSTD, \n platform VARCHAR(20) encode ZSTD AS aliased_platform, \n country VARCHAR(20) encode ZSTD", + "viewLanguage": "sql" } }, "systemMetadata": { @@ -1316,21 +1313,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - }, - { - "id": "nested" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1466,13 +1452,22 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + }, + { + "id": "nested" + } ] } }, @@ -1486,12 +1481,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "SELECT\n customer_id,\n SUM(sale_price) AS lifetime_spend\n FROM\n order\n WHERE\n {% condition order_region %} order.region {% endcondition %}\n GROUP BY 1", - "viewLanguage": "sql" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1504,10 +1499,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "SELECT\n customer_id,\n SUM(sale_price) AS lifetime_spend\n FROM\n order\n WHERE\n {% condition order_region %} order.region {% endcondition %}\n GROUP BY 1", + "viewLanguage": "sql" } }, "systemMetadata": { @@ -1520,18 +1517,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1592,13 +1581,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1612,12 +1607,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "view: ability {\n sql_table_name: \"ECOMMERCE\".\"ABILITY\"\n ;;\n\n dimension: pk {\n type: number\n sql: ${TABLE}.\"PK\" ;;\n }\n\n measure: count {\n type: count\n drill_fields: []\n }\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1630,10 +1625,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "view: ability {\n sql_table_name: \"ECOMMERCE\".\"ABILITY\"\n ;;\n\n dimension: pk {\n type: number\n sql: ${TABLE}.\"PK\" ;;\n }\n\n measure: count {\n type: count\n drill_fields: []\n }\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -1646,18 +1643,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1706,6 +1695,17 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD),pk)" ], "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hive,ecommerce.ability,PROD),count)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD),count)" + ], + "confidenceScore": 1.0 } ] } @@ -1797,13 +1797,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1817,12 +1823,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "view: owners {\n dimension: id {\n primary_key: yes\n sql: ${TABLE}.id ;;\n }\n dimension: owner_name {\n sql: ${TABLE}.owner_name ;;\n }\n}", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1835,10 +1841,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "view: owners {\n dimension: id {\n primary_key: yes\n sql: ${TABLE}.id ;;\n }\n dimension: owner_name {\n sql: ${TABLE}.owner_name ;;\n }\n}", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -1851,18 +1859,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -2015,13 +2015,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -2035,12 +2041,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "explore_source: my_view_explore {\n bind_all_filters: yes\n\n column: country {\n field: my_view_explore.country\n }\n\n column: city {\n field: my_view_explore.city\n }\n\n column: is_latest {\n field: my_view_explore.is_latest\n }\n\n derived_column: derived_col {\n sql: coalesce(country, 'US') ;;\n }\n}", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -2053,10 +2059,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "explore_source: my_view_explore {\n bind_all_filters: yes\n\n column: country {\n field: my_view_explore.country\n }\n\n column: city {\n field: my_view_explore.city\n }\n\n column: is_latest {\n field: my_view_explore.is_latest\n }\n\n derived_column: derived_col {\n sql: coalesce(country, 'US') ;;\n }\n}", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -2069,18 +2077,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -2140,6 +2140,28 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD),city)" ], "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view_explore,PROD),my_view_explore.country)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD),unique_countries)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view_explore,PROD),my_view_explore.is_latest)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD),derived_col)" + ], + "confidenceScore": 1.0 } ] } @@ -2273,13 +2295,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -2293,12 +2321,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "view: flights {\n sql_table_name: flightstats.accidents ;;\n\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: number\n sql: ${TABLE}.id ;;\n }\n}\n\n# override type of id parameter\nview: +flights {\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: string\n sql: ${TABLE}.id ;;\n }\n}", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -2311,10 +2339,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "view: flights {\n sql_table_name: flightstats.accidents ;;\n\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: number\n sql: ${TABLE}.id ;;\n }\n}\n\n# override type of id parameter\nview: +flights {\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: string\n sql: ${TABLE}.id ;;\n }\n}", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -2327,18 +2357,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -2457,6 +2479,30 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_badsql_parser.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_badsql_parser.json index 2aa4a3a41ba35..7e323170e58da 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_badsql_parser.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_badsql_parser.json @@ -142,30 +142,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -336,13 +312,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -356,12 +338,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "SELECT\n country,\n city,\n timestamp,\n measurement\n FROM\n ${my_view.SQL_TABLE_NAME} AS my_view", - "viewLanguage": "sql" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -374,10 +356,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "SELECT\n country,\n city,\n timestamp,\n measurement\n FROM\n ${my_view.SQL_TABLE_NAME} AS my_view", + "viewLanguage": "sql" } }, "systemMetadata": { @@ -390,18 +374,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -559,13 +535,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -579,12 +561,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "view: include_able_view {\n sql_table_name: looker_schema.include_able ;;\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -597,10 +579,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "view: include_able_view {\n sql_table_name: looker_schema.include_able ;;\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -613,18 +597,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -685,13 +661,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -705,12 +687,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -723,10 +705,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -739,18 +723,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -811,13 +787,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -831,12 +813,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -849,10 +831,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -865,18 +849,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -913,6 +889,19 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.looker_schema.events,PROD)", "type": "VIEW" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.looker_schema.events,PROD),additional_measure)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD),additional_measure)" + ], + "confidenceScore": 1.0 + } ] } }, @@ -982,13 +971,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1002,12 +997,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1020,10 +1015,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -1036,18 +1033,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1108,13 +1097,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1128,12 +1123,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1146,10 +1141,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -1162,18 +1159,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1234,13 +1223,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1254,12 +1249,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "date DATE encode ZSTD, \n platform VARCHAR(20) encode ZSTD AS aliased_platform, \n country VARCHAR(20) encode ZSTD", - "viewLanguage": "sql" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1272,10 +1267,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "date DATE encode ZSTD, \n platform VARCHAR(20) encode ZSTD AS aliased_platform, \n country VARCHAR(20) encode ZSTD", + "viewLanguage": "sql" } }, "systemMetadata": { @@ -1288,21 +1285,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - }, - { - "id": "nested" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1349,13 +1335,22 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + }, + { + "id": "nested" + } ] } }, @@ -1369,12 +1364,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "SELECT\n customer_id,\n SUM(sale_price) AS lifetime_spend\n FROM\n order\n WHERE\n {% condition order_region %} order.region {% endcondition %}\n GROUP BY 1", - "viewLanguage": "sql" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1387,10 +1382,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "SELECT\n customer_id,\n SUM(sale_price) AS lifetime_spend\n FROM\n order\n WHERE\n {% condition order_region %} order.region {% endcondition %}\n GROUP BY 1", + "viewLanguage": "sql" } }, "systemMetadata": { @@ -1403,18 +1400,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1475,13 +1464,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1495,12 +1490,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "view: ability {\n sql_table_name: \"ECOMMERCE\".\"ABILITY\"\n ;;\n\n dimension: pk {\n type: number\n sql: ${TABLE}.\"PK\" ;;\n }\n\n measure: count {\n type: count\n drill_fields: []\n }\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1513,10 +1508,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "view: ability {\n sql_table_name: \"ECOMMERCE\".\"ABILITY\"\n ;;\n\n dimension: pk {\n type: number\n sql: ${TABLE}.\"PK\" ;;\n }\n\n measure: count {\n type: count\n drill_fields: []\n }\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -1529,18 +1526,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1589,6 +1578,17 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD),pk)" ], "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.ecommerce.ability,PROD),count)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD),count)" + ], + "confidenceScore": 1.0 } ] } @@ -1680,13 +1680,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1700,12 +1706,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "view: owners {\n dimension: id {\n primary_key: yes\n sql: ${TABLE}.id ;;\n }\n dimension: owner_name {\n sql: ${TABLE}.owner_name ;;\n }\n}", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1718,10 +1724,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "view: owners {\n dimension: id {\n primary_key: yes\n sql: ${TABLE}.id ;;\n }\n dimension: owner_name {\n sql: ${TABLE}.owner_name ;;\n }\n}", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -1734,18 +1742,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1898,13 +1898,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1918,12 +1924,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "explore_source: my_view_explore {\n bind_all_filters: yes\n\n column: country {\n field: my_view_explore.country\n }\n\n column: city {\n field: my_view_explore.city\n }\n\n column: is_latest {\n field: my_view_explore.is_latest\n }\n\n derived_column: derived_col {\n sql: coalesce(country, 'US') ;;\n }\n}", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1936,10 +1942,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "explore_source: my_view_explore {\n bind_all_filters: yes\n\n column: country {\n field: my_view_explore.country\n }\n\n column: city {\n field: my_view_explore.city\n }\n\n column: is_latest {\n field: my_view_explore.is_latest\n }\n\n derived_column: derived_col {\n sql: coalesce(country, 'US') ;;\n }\n}", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -1952,18 +1960,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -2023,6 +2023,28 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD),city)" ], "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view_explore,PROD),my_view_explore.country)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD),unique_countries)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view_explore,PROD),my_view_explore.is_latest)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD),derived_col)" + ], + "confidenceScore": 1.0 } ] } @@ -2156,13 +2178,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -2176,12 +2204,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "view: flights {\n sql_table_name: flightstats.accidents ;;\n\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: number\n sql: ${TABLE}.id ;;\n }\n}\n\n# override type of id parameter\nview: +flights {\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: string\n sql: ${TABLE}.id ;;\n }\n}", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -2194,10 +2222,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "view: flights {\n sql_table_name: flightstats.accidents ;;\n\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: number\n sql: ${TABLE}.id ;;\n }\n}\n\n# override type of id parameter\nview: +flights {\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: string\n sql: ${TABLE}.id ;;\n }\n}", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -2210,18 +2240,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -2340,6 +2362,30 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json index 3798c9b71e3b8..caefb7b9bcce2 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json @@ -142,30 +142,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -350,13 +326,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -370,12 +352,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "SELECT\n country,\n city,\n timestamp,\n measurement\n FROM\n ${my_view.SQL_TABLE_NAME} AS my_view", - "viewLanguage": "sql" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -388,10 +370,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "SELECT\n country,\n city,\n timestamp,\n measurement\n FROM\n ${my_view.SQL_TABLE_NAME} AS my_view", + "viewLanguage": "sql" } }, "systemMetadata": { @@ -404,18 +388,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -587,13 +563,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -607,12 +589,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "view: include_able_view {\n sql_table_name: looker_schema.include_able ;;\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -625,10 +607,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "view: include_able_view {\n sql_table_name: looker_schema.include_able ;;\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -641,18 +625,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -713,13 +689,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -733,12 +715,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -751,10 +733,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -767,18 +751,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -839,13 +815,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -859,12 +841,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -877,10 +859,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -893,18 +877,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -941,6 +917,19 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.looker_schema.events,PROD)", "type": "VIEW" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.looker_schema.events,PROD),additional_measure)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD),additional_measure)" + ], + "confidenceScore": 1.0 + } ] } }, @@ -1010,13 +999,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1030,12 +1025,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1048,10 +1043,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -1064,18 +1061,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1136,13 +1125,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1156,12 +1151,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1174,10 +1169,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -1190,18 +1187,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1262,13 +1251,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1282,12 +1277,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "date DATE encode ZSTD, \n platform VARCHAR(20) encode ZSTD AS aliased_platform, \n country VARCHAR(20) encode ZSTD", - "viewLanguage": "sql" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1300,10 +1295,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "date DATE encode ZSTD, \n platform VARCHAR(20) encode ZSTD AS aliased_platform, \n country VARCHAR(20) encode ZSTD", + "viewLanguage": "sql" } }, "systemMetadata": { @@ -1316,21 +1313,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - }, - { - "id": "nested" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1466,13 +1452,22 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + }, + { + "id": "nested" + } ] } }, @@ -1486,12 +1481,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "SELECT\n customer_id,\n SUM(sale_price) AS lifetime_spend\n FROM\n order\n WHERE\n {% condition order_region %} order.region {% endcondition %}\n GROUP BY 1", - "viewLanguage": "sql" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1504,10 +1499,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "SELECT\n customer_id,\n SUM(sale_price) AS lifetime_spend\n FROM\n order\n WHERE\n {% condition order_region %} order.region {% endcondition %}\n GROUP BY 1", + "viewLanguage": "sql" } }, "systemMetadata": { @@ -1520,18 +1517,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1592,13 +1581,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1612,12 +1607,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "view: ability {\n sql_table_name: \"ECOMMERCE\".\"ABILITY\"\n ;;\n\n dimension: pk {\n type: number\n sql: ${TABLE}.\"PK\" ;;\n }\n\n measure: count {\n type: count\n drill_fields: []\n }\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1630,10 +1625,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "view: ability {\n sql_table_name: \"ECOMMERCE\".\"ABILITY\"\n ;;\n\n dimension: pk {\n type: number\n sql: ${TABLE}.\"PK\" ;;\n }\n\n measure: count {\n type: count\n drill_fields: []\n }\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -1646,18 +1643,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1706,6 +1695,17 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD),pk)" ], "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.ecommerce.ability,PROD),count)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD),count)" + ], + "confidenceScore": 1.0 } ] } @@ -1797,13 +1797,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1817,12 +1823,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "view: owners {\n dimension: id {\n primary_key: yes\n sql: ${TABLE}.id ;;\n }\n dimension: owner_name {\n sql: ${TABLE}.owner_name ;;\n }\n}", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1835,10 +1841,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "view: owners {\n dimension: id {\n primary_key: yes\n sql: ${TABLE}.id ;;\n }\n dimension: owner_name {\n sql: ${TABLE}.owner_name ;;\n }\n}", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -1851,18 +1859,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -2015,13 +2015,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -2035,12 +2041,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "explore_source: my_view_explore {\n bind_all_filters: yes\n\n column: country {\n field: my_view_explore.country\n }\n\n column: city {\n field: my_view_explore.city\n }\n\n column: is_latest {\n field: my_view_explore.is_latest\n }\n\n derived_column: derived_col {\n sql: coalesce(country, 'US') ;;\n }\n}", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -2053,10 +2059,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "explore_source: my_view_explore {\n bind_all_filters: yes\n\n column: country {\n field: my_view_explore.country\n }\n\n column: city {\n field: my_view_explore.city\n }\n\n column: is_latest {\n field: my_view_explore.is_latest\n }\n\n derived_column: derived_col {\n sql: coalesce(country, 'US') ;;\n }\n}", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -2069,18 +2077,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -2140,6 +2140,28 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD),city)" ], "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view_explore,PROD),my_view_explore.country)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD),unique_countries)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view_explore,PROD),my_view_explore.is_latest)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD),derived_col)" + ], + "confidenceScore": 1.0 } ] } @@ -2273,13 +2295,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -2293,12 +2321,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "view: flights {\n sql_table_name: flightstats.accidents ;;\n\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: number\n sql: ${TABLE}.id ;;\n }\n}\n\n# override type of id parameter\nview: +flights {\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: string\n sql: ${TABLE}.id ;;\n }\n}", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -2311,10 +2339,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "view: flights {\n sql_table_name: flightstats.accidents ;;\n\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: number\n sql: ${TABLE}.id ;;\n }\n}\n\n# override type of id parameter\nview: +flights {\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: string\n sql: ${TABLE}.id ;;\n }\n}", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -2327,18 +2357,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -2457,6 +2479,30 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_deny_pattern.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_deny_pattern.json index 414a800e74a5a..32386098b7be8 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_deny_pattern.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_deny_pattern.json @@ -142,30 +142,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -218,13 +194,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -238,12 +220,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -256,10 +238,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -272,18 +256,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -344,13 +320,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -364,12 +346,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -382,10 +364,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -398,18 +382,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -446,6 +422,19 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.looker_schema.events,PROD)", "type": "VIEW" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.looker_schema.events,PROD),additional_measure)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD),additional_measure)" + ], + "confidenceScore": 1.0 + } ] } }, @@ -515,13 +504,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -535,12 +530,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -553,10 +548,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -569,18 +566,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -641,13 +630,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -661,12 +656,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -679,10 +674,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -695,18 +692,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -765,6 +754,30 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json index 54861ed04868b..c0cec6c261010 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json @@ -142,30 +142,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -350,13 +326,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -370,12 +352,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "SELECT\n country,\n city,\n timestamp,\n measurement\n FROM\n ${my_view.SQL_TABLE_NAME} AS my_view", - "viewLanguage": "sql" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -388,10 +370,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "SELECT\n country,\n city,\n timestamp,\n measurement\n FROM\n ${my_view.SQL_TABLE_NAME} AS my_view", + "viewLanguage": "sql" } }, "systemMetadata": { @@ -404,18 +388,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -587,13 +563,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -607,12 +589,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "view: include_able_view {\n sql_table_name: looker_schema.include_able ;;\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -625,10 +607,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "view: include_able_view {\n sql_table_name: looker_schema.include_able ;;\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -641,18 +625,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -713,13 +689,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -733,12 +715,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -751,10 +733,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -767,18 +751,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -839,13 +815,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -859,12 +841,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -877,10 +859,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -893,18 +877,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -941,6 +917,19 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,warehouse.default_db.looker_schema.events,DEV)", "type": "VIEW" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,warehouse.default_db.looker_schema.events,DEV),additional_measure)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD),additional_measure)" + ], + "confidenceScore": 1.0 + } ] } }, @@ -1010,13 +999,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1030,12 +1025,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1048,10 +1043,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -1064,18 +1061,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1136,13 +1125,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1156,12 +1151,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1174,10 +1169,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -1190,18 +1187,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1262,13 +1251,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1282,12 +1277,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "date DATE encode ZSTD, \n platform VARCHAR(20) encode ZSTD AS aliased_platform, \n country VARCHAR(20) encode ZSTD", - "viewLanguage": "sql" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1300,10 +1295,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "date DATE encode ZSTD, \n platform VARCHAR(20) encode ZSTD AS aliased_platform, \n country VARCHAR(20) encode ZSTD", + "viewLanguage": "sql" } }, "systemMetadata": { @@ -1316,21 +1313,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - }, - { - "id": "nested" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1466,13 +1452,22 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + }, + { + "id": "nested" + } ] } }, @@ -1486,12 +1481,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "SELECT\n customer_id,\n SUM(sale_price) AS lifetime_spend\n FROM\n order\n WHERE\n {% condition order_region %} order.region {% endcondition %}\n GROUP BY 1", - "viewLanguage": "sql" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1504,10 +1499,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "SELECT\n customer_id,\n SUM(sale_price) AS lifetime_spend\n FROM\n order\n WHERE\n {% condition order_region %} order.region {% endcondition %}\n GROUP BY 1", + "viewLanguage": "sql" } }, "systemMetadata": { @@ -1520,18 +1517,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1592,13 +1581,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1612,12 +1607,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "view: ability {\n sql_table_name: \"ECOMMERCE\".\"ABILITY\"\n ;;\n\n dimension: pk {\n type: number\n sql: ${TABLE}.\"PK\" ;;\n }\n\n measure: count {\n type: count\n drill_fields: []\n }\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1630,10 +1625,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "view: ability {\n sql_table_name: \"ECOMMERCE\".\"ABILITY\"\n ;;\n\n dimension: pk {\n type: number\n sql: ${TABLE}.\"PK\" ;;\n }\n\n measure: count {\n type: count\n drill_fields: []\n }\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -1646,18 +1643,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1706,6 +1695,17 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD),pk)" ], "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,warehouse.default_db.ecommerce.ability,DEV),count)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD),count)" + ], + "confidenceScore": 1.0 } ] } @@ -1797,13 +1797,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1817,12 +1823,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "view: owners {\n dimension: id {\n primary_key: yes\n sql: ${TABLE}.id ;;\n }\n dimension: owner_name {\n sql: ${TABLE}.owner_name ;;\n }\n}", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1835,10 +1841,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "view: owners {\n dimension: id {\n primary_key: yes\n sql: ${TABLE}.id ;;\n }\n dimension: owner_name {\n sql: ${TABLE}.owner_name ;;\n }\n}", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -1851,18 +1859,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -2015,13 +2015,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -2035,12 +2041,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "explore_source: my_view_explore {\n bind_all_filters: yes\n\n column: country {\n field: my_view_explore.country\n }\n\n column: city {\n field: my_view_explore.city\n }\n\n column: is_latest {\n field: my_view_explore.is_latest\n }\n\n derived_column: derived_col {\n sql: coalesce(country, 'US') ;;\n }\n}", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -2053,10 +2059,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "explore_source: my_view_explore {\n bind_all_filters: yes\n\n column: country {\n field: my_view_explore.country\n }\n\n column: city {\n field: my_view_explore.city\n }\n\n column: is_latest {\n field: my_view_explore.is_latest\n }\n\n derived_column: derived_col {\n sql: coalesce(country, 'US') ;;\n }\n}", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -2069,18 +2077,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -2140,6 +2140,28 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD),city)" ], "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view_explore,PROD),my_view_explore.country)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD),unique_countries)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view_explore,PROD),my_view_explore.is_latest)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD),derived_col)" + ], + "confidenceScore": 1.0 } ] } @@ -2273,13 +2295,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -2293,12 +2321,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "view: flights {\n sql_table_name: flightstats.accidents ;;\n\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: number\n sql: ${TABLE}.id ;;\n }\n}\n\n# override type of id parameter\nview: +flights {\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: string\n sql: ${TABLE}.id ;;\n }\n}", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -2311,10 +2339,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "view: flights {\n sql_table_name: flightstats.accidents ;;\n\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: number\n sql: ${TABLE}.id ;;\n }\n}\n\n# override type of id parameter\nview: +flights {\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: string\n sql: ${TABLE}.id ;;\n }\n}", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -2327,18 +2357,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -2457,6 +2479,30 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json index 617aeceb5d68f..31aec97293e5a 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json @@ -142,30 +142,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -351,13 +327,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -371,12 +353,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "SELECT\n country,\n city,\n timestamp,\n measurement\n FROM\n ${my_view.SQL_TABLE_NAME} AS my_view", - "viewLanguage": "sql" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -389,10 +371,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "SELECT\n country,\n city,\n timestamp,\n measurement\n FROM\n ${my_view.SQL_TABLE_NAME} AS my_view", + "viewLanguage": "sql" } }, "systemMetadata": { @@ -405,18 +389,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -589,13 +565,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.my_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -609,12 +591,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "view: include_able_view {\n sql_table_name: looker_schema.include_able ;;\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -627,10 +609,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "view: include_able_view {\n sql_table_name: looker_schema.include_able ;;\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -643,18 +627,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -716,13 +692,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.include_able_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -736,12 +718,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -754,10 +736,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -770,18 +754,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -843,13 +819,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -863,12 +845,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -881,10 +863,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -897,18 +881,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -945,6 +921,19 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.looker_schema.events,PROD)", "type": "VIEW" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.looker_schema.events,PROD),additional_measure)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD),additional_measure)" + ], + "confidenceScore": 1.0 + } ] } }, @@ -1015,13 +1004,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1035,12 +1030,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1053,10 +1048,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -1069,18 +1066,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1142,13 +1131,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1162,12 +1157,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1180,10 +1175,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -1196,18 +1193,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1269,13 +1258,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1289,12 +1284,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "date DATE encode ZSTD, \n platform VARCHAR(20) encode ZSTD AS aliased_platform, \n country VARCHAR(20) encode ZSTD", - "viewLanguage": "sql" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1307,10 +1302,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "date DATE encode ZSTD, \n platform VARCHAR(20) encode ZSTD AS aliased_platform, \n country VARCHAR(20) encode ZSTD", + "viewLanguage": "sql" } }, "systemMetadata": { @@ -1323,21 +1320,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - }, - { - "id": "nested" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1474,13 +1460,22 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.fragment_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + }, + { + "id": "nested" + } ] } }, @@ -1494,12 +1489,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "SELECT\n customer_id,\n SUM(sale_price) AS lifetime_spend\n FROM\n order\n WHERE\n {% condition order_region %} order.region {% endcondition %}\n GROUP BY 1", - "viewLanguage": "sql" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1512,10 +1507,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "SELECT\n customer_id,\n SUM(sale_price) AS lifetime_spend\n FROM\n order\n WHERE\n {% condition order_region %} order.region {% endcondition %}\n GROUP BY 1", + "viewLanguage": "sql" } }, "systemMetadata": { @@ -1528,18 +1525,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1601,13 +1590,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1621,12 +1616,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "view: ability {\n sql_table_name: \"ECOMMERCE\".\"ABILITY\"\n ;;\n\n dimension: pk {\n type: number\n sql: ${TABLE}.\"PK\" ;;\n }\n\n measure: count {\n type: count\n drill_fields: []\n }\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1639,10 +1634,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "view: ability {\n sql_table_name: \"ECOMMERCE\".\"ABILITY\"\n ;;\n\n dimension: pk {\n type: number\n sql: ${TABLE}.\"PK\" ;;\n }\n\n measure: count {\n type: count\n drill_fields: []\n }\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -1655,18 +1652,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1715,6 +1704,17 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD),pk)" ], "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.ecommerce.ability,PROD),count)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD),count)" + ], + "confidenceScore": 1.0 } ] } @@ -1807,13 +1807,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.ability,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -1827,12 +1833,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "view: owners {\n dimension: id {\n primary_key: yes\n sql: ${TABLE}.id ;;\n }\n dimension: owner_name {\n sql: ${TABLE}.owner_name ;;\n }\n}", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1845,10 +1851,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "view: owners {\n dimension: id {\n primary_key: yes\n sql: ${TABLE}.id ;;\n }\n dimension: owner_name {\n sql: ${TABLE}.owner_name ;;\n }\n}", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -1861,18 +1869,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -2026,13 +2026,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.owners,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -2046,12 +2052,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "explore_source: my_view_explore {\n bind_all_filters: yes\n\n column: country {\n field: my_view_explore.country\n }\n\n column: city {\n field: my_view_explore.city\n }\n\n column: is_latest {\n field: my_view_explore.is_latest\n }\n\n derived_column: derived_col {\n sql: coalesce(country, 'US') ;;\n }\n}", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -2064,10 +2070,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "explore_source: my_view_explore {\n bind_all_filters: yes\n\n column: country {\n field: my_view_explore.country\n }\n\n column: city {\n field: my_view_explore.city\n }\n\n column: is_latest {\n field: my_view_explore.is_latest\n }\n\n derived_column: derived_col {\n sql: coalesce(country, 'US') ;;\n }\n}", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -2080,18 +2088,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -2151,6 +2151,28 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD),city)" ], "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view_explore,PROD),my_view_explore.country)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD),unique_countries)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view_explore,PROD),my_view_explore.is_latest)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD),derived_col)" + ], + "confidenceScore": 1.0 } ] } @@ -2285,13 +2307,19 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.view_derived_explore,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } ] } }, @@ -2305,12 +2333,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "view: flights {\n sql_table_name: flightstats.accidents ;;\n\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: number\n sql: ${TABLE}.id ;;\n }\n}\n\n# override type of id parameter\nview: +flights {\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: string\n sql: ${TABLE}.id ;;\n }\n}", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -2323,10 +2351,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "view: flights {\n sql_table_name: flightstats.accidents ;;\n\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: number\n sql: ${TABLE}.id ;;\n }\n}\n\n# override type of id parameter\nview: +flights {\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: string\n sql: ${TABLE}.id ;;\n }\n}", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -2339,18 +2369,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -2470,6 +2492,30 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.flights,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { diff --git a/metadata-ingestion/tests/integration/lookml/refinement_include_order_golden.json b/metadata-ingestion/tests/integration/lookml/refinement_include_order_golden.json index 61b45bc961307..70f48953a06ad 100644 --- a/metadata-ingestion/tests/integration/lookml/refinement_include_order_golden.json +++ b/metadata-ingestion/tests/integration/lookml/refinement_include_order_golden.json @@ -142,33 +142,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.book,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:6f4a333fac5ec55b27b0e65bfb57ef0d", - "urn": "urn:li:container:6f4a333fac5ec55b27b0e65bfb57ef0d" - }, - { - "id": "views" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -242,6 +215,17 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.book,PROD),issue_date_3)" ], "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.book,PROD),count)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.book,PROD),count)" + ], + "confidenceScore": 1.0 } ] } @@ -396,13 +380,22 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.extend_book,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.book,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:6f4a333fac5ec55b27b0e65bfb57ef0d", + "urn": "urn:li:container:6f4a333fac5ec55b27b0e65bfb57ef0d" + }, + { + "id": "views" + } ] } }, @@ -416,12 +409,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.extend_book,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"book.view\"\n\nview: +book {\n dimension: issue_date_3 {\n type: number\n sql: ${TABLE}.\"date\" ;;\n }\n}\n\n\nview: extend_book {\n extends: [book]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -434,10 +427,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.extend_book,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:6f4a333fac5ec55b27b0e65bfb57ef0d" + "materialized": false, + "viewLogic": "include: \"book.view\"\n\nview: +book {\n dimension: issue_date_3 {\n type: number\n sql: ${TABLE}.\"date\" ;;\n }\n}\n\n\nview: extend_book {\n extends: [book]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -450,21 +445,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.extend_book,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:6f4a333fac5ec55b27b0e65bfb57ef0d", - "urn": "urn:li:container:6f4a333fac5ec55b27b0e65bfb57ef0d" - }, - { - "id": "views" - } - ] + "container": "urn:li:container:6f4a333fac5ec55b27b0e65bfb57ef0d" } }, "systemMetadata": { @@ -525,13 +509,22 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.order,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.extend_book,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:6f4a333fac5ec55b27b0e65bfb57ef0d", + "urn": "urn:li:container:6f4a333fac5ec55b27b0e65bfb57ef0d" + }, + { + "id": "views" + } ] } }, @@ -545,12 +538,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.order,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "view: order {\n sql_table_name: public.order ;;\n\n dimension: order_id {\n type: number\n sql: ${TABLE}.\"order_id\" ;;\n }\n\n dimension: book_id {\n type: number\n sql: ${TABLE}.\"book_id\" ;;\n }\n\n}", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -563,10 +556,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.order,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:6f4a333fac5ec55b27b0e65bfb57ef0d" + "materialized": false, + "viewLogic": "view: order {\n sql_table_name: public.order ;;\n\n dimension: order_id {\n type: number\n sql: ${TABLE}.\"order_id\" ;;\n }\n\n dimension: book_id {\n type: number\n sql: ${TABLE}.\"book_id\" ;;\n }\n\n}", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -579,21 +574,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.order,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:6f4a333fac5ec55b27b0e65bfb57ef0d", - "urn": "urn:li:container:6f4a333fac5ec55b27b0e65bfb57ef0d" - }, - { - "id": "views" - } - ] + "container": "urn:li:container:6f4a333fac5ec55b27b0e65bfb57ef0d" } }, "systemMetadata": { @@ -744,13 +728,22 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_2.view.issue_history,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_1.view.order,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:6f4a333fac5ec55b27b0e65bfb57ef0d", + "urn": "urn:li:container:6f4a333fac5ec55b27b0e65bfb57ef0d" + }, + { + "id": "views" + } ] } }, @@ -764,12 +757,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_2.view.issue_history,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"book.view\"\n\nview: issue_history {\n sql_table_name: public.issue_history ;;\n\n dimension: book_name {\n type: string\n sql: ${TABLE}.\"book_name\" ;;\n }\n\n dimension: user_name {\n type: string\n sql: ${TABLE}.\"user_name\" ;;\n }\n\n measure: count {\n type: count\n drill_fields: [book_name, user_name]\n }\n}\n\n\nview: +book {\n dimension: issue_date_2 {\n type: string\n sql: ${TABLE}.\"date\" ;;\n }\n}\n\nview: +book {\n dimension: issue_date_4 {\n type: string\n sql: ${TABLE}.\"date\" ;;\n }\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -782,10 +775,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_2.view.issue_history,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:6f4a333fac5ec55b27b0e65bfb57ef0d" + "materialized": false, + "viewLogic": "include: \"book.view\"\n\nview: issue_history {\n sql_table_name: public.issue_history ;;\n\n dimension: book_name {\n type: string\n sql: ${TABLE}.\"book_name\" ;;\n }\n\n dimension: user_name {\n type: string\n sql: ${TABLE}.\"user_name\" ;;\n }\n\n measure: count {\n type: count\n drill_fields: [book_name, user_name]\n }\n}\n\n\nview: +book {\n dimension: issue_date_2 {\n type: string\n sql: ${TABLE}.\"date\" ;;\n }\n}\n\nview: +book {\n dimension: issue_date_4 {\n type: string\n sql: ${TABLE}.\"date\" ;;\n }\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -798,21 +793,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_2.view.issue_history,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:6f4a333fac5ec55b27b0e65bfb57ef0d", - "urn": "urn:li:container:6f4a333fac5ec55b27b0e65bfb57ef0d" - }, - { - "id": "views" - } - ] + "container": "urn:li:container:6f4a333fac5ec55b27b0e65bfb57ef0d" } }, "systemMetadata": { @@ -872,6 +856,17 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_2.view.issue_history,PROD),user_name)" ], "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.public.issue_history,PROD),count)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_2.view.issue_history,PROD),count)" + ], + "confidenceScore": 1.0 } ] } @@ -982,6 +977,33 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_refinement_sample1.model_2.view.issue_history,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:6f4a333fac5ec55b27b0e65bfb57ef0d", + "urn": "urn:li:container:6f4a333fac5ec55b27b0e65bfb57ef0d" + }, + { + "id": "views" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "tag", "entityUrn": "urn:li:tag:Dimension", diff --git a/metadata-ingestion/tests/integration/lookml/refinements_ingestion_golden.json b/metadata-ingestion/tests/integration/lookml/refinements_ingestion_golden.json index a98bcd6f89dbd..16e901125e451 100644 --- a/metadata-ingestion/tests/integration/lookml/refinements_ingestion_golden.json +++ b/metadata-ingestion/tests/integration/lookml/refinements_ingestion_golden.json @@ -142,33 +142,6 @@ "lastRunId": "no-run-id-provided" } }, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.foo.view.my_view,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - }, - { - "id": "foo" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { @@ -353,13 +326,22 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.bar.view.my_derived_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.foo.view.my_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + }, + { + "id": "foo" + } ] } }, @@ -373,12 +355,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.bar.view.my_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "SELECT\n country,\n city,\n timestamp,\n measurement\n FROM\n ${my_view.SQL_TABLE_NAME} AS my_view", - "viewLanguage": "sql" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -391,10 +373,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.bar.view.my_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "SELECT\n country,\n city,\n timestamp,\n measurement\n FROM\n ${my_view.SQL_TABLE_NAME} AS my_view", + "viewLanguage": "sql" } }, "systemMetadata": { @@ -407,21 +391,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.bar.view.my_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - }, - { - "id": "bar" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -593,13 +566,22 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.included_view_file.view.include_able_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.bar.view.my_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + }, + { + "id": "bar" + } ] } }, @@ -613,12 +595,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.included_view_file.view.include_able_view,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "view: include_able_view {\n sql_table_name: looker_schema.include_able ;;\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -631,10 +613,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.included_view_file.view.include_able_view,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "view: include_able_view {\n sql_table_name: looker_schema.include_able ;;\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -647,21 +631,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.included_view_file.view.include_able_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - }, - { - "id": "included_view_file" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -722,13 +695,22 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.looker_events,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.included_view_file.view.include_able_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + }, + { + "id": "included_view_file" + } ] } }, @@ -742,12 +724,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -760,10 +742,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -776,21 +760,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - }, - { - "id": "view_declarations" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -851,13 +824,22 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.extending_looker_events,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + }, + { + "id": "view_declarations" + } ] } }, @@ -871,12 +853,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -889,10 +871,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -905,21 +889,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - }, - { - "id": "view_declarations" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -956,6 +929,19 @@ "dataset": "urn:li:dataset:(urn:li:dataPlatform:conn,.looker_schema.events,PROD)", "type": "VIEW" } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.looker_schema.events,PROD),additional_measure)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.extending_looker_events,PROD),additional_measure)" + ], + "confidenceScore": 1.0 + } ] } }, @@ -1025,13 +1011,22 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.autodetect_sql_name_based_on_view_name,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.extending_looker_events,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + }, + { + "id": "view_declarations" + } ] } }, @@ -1045,12 +1040,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1063,10 +1058,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -1079,21 +1076,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - }, - { - "id": "view_declarations" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1154,13 +1140,22 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.test_include_external_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.autodetect_sql_name_based_on_view_name,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + }, + { + "id": "view_declarations" + } ] } }, @@ -1174,12 +1169,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1192,10 +1187,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "include: \"/included_view_file.view\"\n\nview: looker_events {\n sql_table_name: looker_schema.events ;;\n}\n\nview: extending_looker_events {\n extends: [looker_events]\n\n measure: additional_measure {\n type: count\n }\n}\n\nview: autodetect_sql_name_based_on_view_name {}\n\nview: test_include_external_view {\n extends: [include_able_view]\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -1208,21 +1205,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - }, - { - "id": "view_declarations" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1283,13 +1269,22 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.nested.fragment_derived.view.fragment_derived_view,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view_declarations.view.test_include_external_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + }, + { + "id": "view_declarations" + } ] } }, @@ -1303,12 +1298,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.nested.fragment_derived.view.fragment_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "date DATE encode ZSTD, \n platform VARCHAR(20) encode ZSTD AS aliased_platform, \n country VARCHAR(20) encode ZSTD", - "viewLanguage": "sql" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1321,10 +1316,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.nested.fragment_derived.view.fragment_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "date DATE encode ZSTD, \n platform VARCHAR(20) encode ZSTD AS aliased_platform, \n country VARCHAR(20) encode ZSTD", + "viewLanguage": "sql" } }, "systemMetadata": { @@ -1337,24 +1334,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.nested.fragment_derived.view.fragment_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - }, - { - "id": "nested" - }, - { - "id": "fragment_derived" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1490,13 +1473,25 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.liquid.view.customer_facts,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.nested.fragment_derived.view.fragment_derived_view,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + }, + { + "id": "nested" + }, + { + "id": "fragment_derived" + } ] } }, @@ -1510,12 +1505,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.liquid.view.customer_facts,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "SELECT\n customer_id,\n SUM(sale_price) AS lifetime_spend\n FROM\n order\n WHERE\n {% condition order_region %} order.region {% endcondition %}\n GROUP BY 1", - "viewLanguage": "sql" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1528,10 +1523,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.liquid.view.customer_facts,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "SELECT\n customer_id,\n SUM(sale_price) AS lifetime_spend\n FROM\n order\n WHERE\n {% condition order_region %} order.region {% endcondition %}\n GROUP BY 1", + "viewLanguage": "sql" } }, "systemMetadata": { @@ -1544,21 +1541,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.liquid.view.customer_facts,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - }, - { - "id": "liquid" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1619,13 +1605,22 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.ability.view.ability,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.liquid.view.customer_facts,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + }, + { + "id": "liquid" + } ] } }, @@ -1639,12 +1634,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.ability.view.ability,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "view: ability {\n sql_table_name: \"ECOMMERCE\".\"ABILITY\"\n ;;\n\n dimension: pk {\n type: number\n sql: ${TABLE}.\"PK\" ;;\n }\n\n measure: count {\n type: count\n drill_fields: []\n }\n}\n", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1657,10 +1652,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.ability.view.ability,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "view: ability {\n sql_table_name: \"ECOMMERCE\".\"ABILITY\"\n ;;\n\n dimension: pk {\n type: number\n sql: ${TABLE}.\"PK\" ;;\n }\n\n measure: count {\n type: count\n drill_fields: []\n }\n}\n", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -1673,21 +1670,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.ability.view.ability,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - }, - { - "id": "ability" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -1736,6 +1722,17 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.ability.view.ability,PROD),pk)" ], "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:conn,.ecommerce.ability,PROD),count)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.ability.view.ability,PROD),count)" + ], + "confidenceScore": 1.0 } ] } @@ -1827,13 +1824,22 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.owners.view.owners,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.ability.view.ability,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + }, + { + "id": "ability" + } ] } }, @@ -1847,12 +1853,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.owners.view.owners,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "view: owners {\n dimension: id {\n primary_key: yes\n sql: ${TABLE}.id ;;\n }\n dimension: owner_name {\n sql: ${TABLE}.owner_name ;;\n }\n}", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -1865,10 +1871,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.owners.view.owners,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "view: owners {\n dimension: id {\n primary_key: yes\n sql: ${TABLE}.id ;;\n }\n dimension: owner_name {\n sql: ${TABLE}.owner_name ;;\n }\n}", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -1881,21 +1889,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.owners.view.owners,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - }, - { - "id": "owners" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -2080,13 +2077,22 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.native_derived_table.view.view_derived_explore,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.owners.view.owners,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + }, + { + "id": "owners" + } ] } }, @@ -2100,12 +2106,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.native_derived_table.view.view_derived_explore,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "explore_source: my_view_explore {\n bind_all_filters: yes\n\n column: country {\n field: my_view_explore.country\n }\n\n column: city {\n field: my_view_explore.city\n }\n\n column: is_latest {\n field: my_view_explore.is_latest\n }\n\n derived_column: derived_col {\n sql: coalesce(country, 'US') ;;\n }\n}", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -2118,10 +2124,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.native_derived_table.view.view_derived_explore,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "explore_source: my_view_explore {\n bind_all_filters: yes\n\n column: country {\n field: my_view_explore.country\n }\n\n column: city {\n field: my_view_explore.city\n }\n\n column: is_latest {\n field: my_view_explore.is_latest\n }\n\n derived_column: derived_col {\n sql: coalesce(country, 'US') ;;\n }\n}", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -2134,21 +2142,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.native_derived_table.view.view_derived_explore,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - }, - { - "id": "native_derived_table" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -2208,6 +2205,28 @@ "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.native_derived_table.view.view_derived_explore,PROD),city)" ], "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view_explore,PROD),my_view_explore.country)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.native_derived_table.view.view_derived_explore,PROD),unique_countries)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view_explore,PROD),my_view_explore.is_latest)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.native_derived_table.view.view_derived_explore,PROD),derived_col)" + ], + "confidenceScore": 1.0 } ] } @@ -2341,13 +2360,22 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.flights.view.flights,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.native_derived_table.view.view_derived_explore,PROD)", "changeType": "UPSERT", - "aspectName": "subTypes", + "aspectName": "browsePathsV2", "aspect": { "json": { - "typeNames": [ - "View" + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + }, + { + "id": "native_derived_table" + } ] } }, @@ -2361,12 +2389,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.flights.view.flights,PROD)", "changeType": "UPSERT", - "aspectName": "viewProperties", + "aspectName": "subTypes", "aspect": { "json": { - "materialized": false, - "viewLogic": "view: flights {\n sql_table_name: flightstats.accidents ;;\n\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: number\n sql: ${TABLE}.id ;;\n }\n}\n\n# override type of id parameter\nview: +flights {\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: string\n sql: ${TABLE}.id ;;\n }\n}", - "viewLanguage": "lookml" + "typeNames": [ + "View" + ] } }, "systemMetadata": { @@ -2379,10 +2407,12 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.flights.view.flights,PROD)", "changeType": "UPSERT", - "aspectName": "container", + "aspectName": "viewProperties", "aspect": { "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + "materialized": false, + "viewLogic": "view: flights {\n sql_table_name: flightstats.accidents ;;\n\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: number\n sql: ${TABLE}.id ;;\n }\n}\n\n# override type of id parameter\nview: +flights {\n dimension: id {\n label: \"id\"\n primary_key: yes\n type: string\n sql: ${TABLE}.id ;;\n }\n}", + "viewLanguage": "lookml" } }, "systemMetadata": { @@ -2395,21 +2425,10 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.flights.view.flights,PROD)", "changeType": "UPSERT", - "aspectName": "browsePathsV2", + "aspectName": "container", "aspect": { "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - }, - { - "id": "flights" - } - ] + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" } }, "systemMetadata": { @@ -2528,6 +2547,33 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.flights.view.flights,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + }, + { + "id": "flights" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "tag", "entityUrn": "urn:li:tag:Dimension", diff --git a/metadata-ingestion/tests/integration/lookml/test_lookml.py b/metadata-ingestion/tests/integration/lookml/test_lookml.py index b0d973a060390..1c1f0fec3eebb 100644 --- a/metadata-ingestion/tests/integration/lookml/test_lookml.py +++ b/metadata-ingestion/tests/integration/lookml/test_lookml.py @@ -158,7 +158,7 @@ def test_lookml_explore_refinement(pytestconfig, tmp_path, mock_time): {"name": "+book", "extends__all": [["order"]]}, {"name": "+book", "extends__all": [["transaction"]]}, ], - connection=str(), + connection="", resolved_includes=[], includes=[], ) @@ -864,3 +864,26 @@ def test_manifest_parser(pytestconfig: pytest.Config) -> None: manifest = load_lkml(manifest_file) assert manifest + + +@freeze_time(FROZEN_TIME) +def test_duplicate_field_ingest(pytestconfig, tmp_path, mock_time): + test_resources_dir = pytestconfig.rootpath / "tests/integration/lookml" + mce_out_file = "duplicate_ingest_mces_output.json" + + new_recipe = get_default_recipe( + f"{tmp_path}/{mce_out_file}", + f"{test_resources_dir}/lkml_samples_duplicate_field", + ) + + pipeline = Pipeline.create(new_recipe) + pipeline.run() + pipeline.pretty_print_summary() + pipeline.raise_from_status(raise_warnings=True) + + golden_path = test_resources_dir / "duplicate_field_ingestion_golden.json" + mce_helpers.check_golden_file( + pytestconfig, + output_path=tmp_path / mce_out_file, + golden_path=golden_path, + ) diff --git a/metadata-ingestion/tests/integration/metabase/metabase_mces_golden.json b/metadata-ingestion/tests/integration/metabase/metabase_mces_golden.json index 10c1c312a4d1c..9bab81eaa228b 100644 --- a/metadata-ingestion/tests/integration/metabase/metabase_mces_golden.json +++ b/metadata-ingestion/tests/integration/metabase/metabase_mces_golden.json @@ -182,7 +182,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DashboardSnapshot": { - "urn": "urn:li:dashboard:(metabase,1)", + "urn": "urn:li:dashboard:(metabase,10)", "aspects": [ { "com.linkedin.pegasus2avro.dashboard.DashboardInfo": { @@ -234,7 +234,59 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DashboardSnapshot": { - "urn": "urn:li:dashboard:(metabase,1)", + "urn": "urn:li:dashboard:(metabase,20)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dashboard.DashboardInfo": { + "customProperties": {}, + "title": "Dashboard 2", + "description": "", + "charts": [ + "urn:li:chart:(metabase,1)", + "urn:li:chart:(metabase,2)", + "urn:li:chart:(metabase,3)" + ], + "datasets": [], + "lastModified": { + "created": { + "time": 1705398694904, + "actor": "urn:li:corpuser:admin@metabase.com" + }, + "lastModified": { + "time": 1705398694904, + "actor": "urn:li:corpuser:admin@metabase.com" + } + }, + "dashboardUrl": "http://localhost:3000/dashboard/20" + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:admin@metabase.com", + "type": "DATAOWNER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1636614000000, + "runId": "metabase-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DashboardSnapshot": { + "urn": "urn:li:dashboard:(metabase,10)", "aspects": [ { "com.linkedin.pegasus2avro.dashboard.DashboardInfo": { @@ -333,7 +385,23 @@ }, { "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(metabase,1)", + "entityUrn": "urn:li:dashboard:(metabase,10)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1636614000000, + "runId": "metabase-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(metabase,20)", "changeType": "UPSERT", "aspectName": "status", "aspect": { diff --git a/metadata-ingestion/tests/integration/metabase/setup/collection_dashboards.json b/metadata-ingestion/tests/integration/metabase/setup/collection_dashboards.json index b602d2dfb7dcd..16ca1d4d6c48f 100644 --- a/metadata-ingestion/tests/integration/metabase/setup/collection_dashboards.json +++ b/metadata-ingestion/tests/integration/metabase/setup/collection_dashboards.json @@ -1 +1 @@ -{"total": 1, "data": [{"description": null, "collection_position": null, "database_id": null, "name": "This is a test", "id": 10, "entity_id": "Q4gEaOmoBkfQX3_gXiH9g", "last-edit-info": {"id": 14, "last_name": "Doe", "first_name": "John", "email": "john.doe@somewhere.com", "timestamp": "2024-01-12T14:55:38.43304Z"}, "model": "dashboard"}], "models": ["dashboard"], "limit": null, "offset": null} +{"total":2,"data":[{"description":null,"collection_position":null,"database_id":null,"name":"This is a test","id":10,"entity_id":"Q4gEaOmoBkfQX3_gXiH9g","last-edit-info":{"id":14,"last_name":"Doe","first_name":"John","email":"john.doe@somewhere.com","timestamp":"2024-01-12T14:55:38.43304Z"},"model":"dashboard"},{"description":null,"collection_position":null,"database_id":null,"name":"This is a test","id":20,"entity_id":"R5jSaUsuDkqFK9_gTiH2x","last-edit-info":{"id":14,"last_name":"Doe","first_name":"John","email":"john.doe@somewhere.com","timestamp":"2024-01-12T14:55:38.43304Z"},"model":"dashboard"}],"models":["dashboard"],"limit":null,"offset":null} diff --git a/metadata-ingestion/tests/integration/metabase/setup/collection_dashboards_deleted_item.json b/metadata-ingestion/tests/integration/metabase/setup/collection_dashboards_deleted_item.json new file mode 100644 index 0000000000000..b602d2dfb7dcd --- /dev/null +++ b/metadata-ingestion/tests/integration/metabase/setup/collection_dashboards_deleted_item.json @@ -0,0 +1 @@ +{"total": 1, "data": [{"description": null, "collection_position": null, "database_id": null, "name": "This is a test", "id": 10, "entity_id": "Q4gEaOmoBkfQX3_gXiH9g", "last-edit-info": {"id": 14, "last_name": "Doe", "first_name": "John", "email": "john.doe@somewhere.com", "timestamp": "2024-01-12T14:55:38.43304Z"}, "model": "dashboard"}], "models": ["dashboard"], "limit": null, "offset": null} diff --git a/metadata-ingestion/tests/integration/metabase/setup/dashboard_1.json b/metadata-ingestion/tests/integration/metabase/setup/dashboard_1.json index e968093c43850..f58a1079c3620 100644 --- a/metadata-ingestion/tests/integration/metabase/setup/dashboard_1.json +++ b/metadata-ingestion/tests/integration/metabase/setup/dashboard_1.json @@ -588,7 +588,7 @@ "entity_id": "lXypX5aa14HjkN_Im82C2", "visualization_settings": {}, "size_y": 6, - "dashboard_id": 1, + "dashboard_id": 10, "created_at": "2024-01-16T09:50:34.394488Z", "row": 0 }, @@ -802,7 +802,7 @@ "entity_id": "iVOtiEPgX-a90Qh3rJWui", "visualization_settings": {}, "size_y": 6, - "dashboard_id": 1, + "dashboard_id": 20, "created_at": "2024-01-16T09:51:34.833525Z", "row": 6 } @@ -821,7 +821,7 @@ "made_public_by_id": null, "embedding_params": null, "cache_ttl": null, - "id": 1, + "id": 10, "position": null, "entity_id": "Z6B2yiCTEMiwZFe4x5jPT", "param_fields": null, diff --git a/metadata-ingestion/tests/integration/metabase/setup/dashboard_2.json b/metadata-ingestion/tests/integration/metabase/setup/dashboard_2.json new file mode 100644 index 0000000000000..2f9beaccc1e18 --- /dev/null +++ b/metadata-ingestion/tests/integration/metabase/setup/dashboard_2.json @@ -0,0 +1,855 @@ +{ + "description": null, + "archived": false, + "collection_position": null, + "dashcards": [ + { + "size_x": 12, + "dashboard_tab_id": null, + "series": [], + "action_id": null, + "collection_authority_level": null, + "card": { + "description": null, + "archived": false, + "collection_position": null, + "table_id": null, + "result_metadata": [ + { + "display_name": "EVENT_DATE", + "field_ref": [ + "field", + "EVENT_DATE", + { + "base-type": "type/Date" + } + ], + "name": "EVENT_DATE", + "base_type": "type/Date", + "effective_type": "type/Date", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 28, + "nil%": 0 + }, + "type": { + "type/DateTime": { + "earliest": "2023-12-04T00:00:00Z", + "latest": "2024-01-15T00:00:00Z" + } + } + } + }, + { + "display_name": "AND_VIEWERS", + "field_ref": [ + "field", + "AND_VIEWERS", + { + "base-type": "type/Number" + } + ], + "name": "AND_VIEWERS", + "base_type": "type/Number", + "effective_type": "type/Number", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 28, + "nil%": 0 + }, + "type": { + "type/Number": { + "min": 4720, + "q1": 5083.5, + "q3": 9003, + "max": 10560, + "sd": 2090.2420089751945, + "avg": 6688.214285714285 + } + } + } + }, + { + "display_name": "AND_REDACTED", + "field_ref": [ + "field", + "AND_REDACTED", + { + "base-type": "type/Number" + } + ], + "name": "AND_REDACTED", + "base_type": "type/Number", + "effective_type": "type/Number", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 28, + "nil%": 0 + }, + "type": { + "type/Number": { + "min": 948, + "q1": 2019.5, + "q3": 2500.5, + "max": 3180, + "sd": 460.56365857271413, + "avg": 2251.0714285714284 + } + } + } + }, + { + "display_name": "AND_REDACTED", + "field_ref": [ + "field", + "AND_REDACTED", + { + "base-type": "type/Number" + } + ], + "name": "AND_REDACTED", + "base_type": "type/Number", + "effective_type": "type/Number", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 28, + "nil%": 0 + }, + "type": { + "type/Number": { + "min": 3545, + "q1": 10909, + "q3": 13916, + "max": 18861, + "sd": 3132.780684756446, + "avg": 12122.32142857143 + } + } + } + }, + { + "display_name": "IOS_VIEWERS", + "field_ref": [ + "field", + "IOS_VIEWERS", + { + "base-type": "type/Number" + } + ], + "name": "IOS_VIEWERS", + "base_type": "type/Number", + "effective_type": "type/Number", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 28, + "nil%": 0 + }, + "type": { + "type/Number": { + "min": 6477, + "q1": 7481.5, + "q3": 10428.5, + "max": 13182, + "sd": 1948.047456520796, + "avg": 9075.17857142857 + } + } + } + }, + { + "display_name": "IOS_REDACTED", + "field_ref": [ + "field", + "IOS_REDACTED", + { + "base-type": "type/Number" + } + ], + "name": "IOS_REDACTED", + "base_type": "type/Number", + "effective_type": "type/Number", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 28, + "nil%": 0 + }, + "type": { + "type/Number": { + "min": 1470, + "q1": 3020, + "q3": 3806, + "max": 4670, + "sd": 665.7415088559197, + "avg": 3415.8571428571427 + } + } + } + }, + { + "display_name": "IOS_REDACTED", + "field_ref": [ + "field", + "IOS_REDACTED", + { + "base-type": "type/Number" + } + ], + "name": "IOS_REDACTED", + "base_type": "type/Number", + "effective_type": "type/Number", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 28, + "nil%": 0 + }, + "type": { + "type/Number": { + "min": 4872, + "q1": 15019.5, + "q3": 20457, + "max": 27466, + "sd": 4688.492913816769, + "avg": 17683.89285714286 + } + } + } + }, + { + "display_name": "IOS_REDACTED/IOS_VIEWERS", + "field_ref": [ + "field", + "IOS_REDACTED/IOS_VIEWERS", + { + "base-type": "type/Number" + } + ], + "name": "IOS_REDACTED/IOS_VIEWERS", + "base_type": "type/Number", + "effective_type": "type/Number", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 28, + "nil%": 0 + }, + "type": { + "type/Number": { + "min": 0.662587, + "q1": 1.8403745, + "q3": 2.241517, + "max": 2.576166, + "sd": 0.4488826998266724, + "avg": 1.974007857142857 + } + } + } + }, + { + "display_name": "AND_REDACTED/AND_VIEWERS", + "field_ref": [ + "field", + "AND_REDACTED/AND_VIEWERS", + { + "base-type": "type/Number" + } + ], + "name": "AND_REDACTED/AND_VIEWERS", + "base_type": "type/Number", + "effective_type": "type/Number", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 28, + "nil%": 0 + }, + "type": { + "type/Number": { + "min": 0.671656, + "q1": 1.3536655, + "q3": 2.5325145, + "max": 3.097553, + "sd": 0.6816847359625038, + "avg": 1.93937275 + } + } + } + }, + { + "display_name": "IOS_REDACTED/IOS_VIEWERS", + "field_ref": [ + "field", + "IOS_REDACTED/IOS_VIEWERS", + { + "base-type": "type/Number" + } + ], + "name": "IOS_REDACTED/IOS_VIEWERS", + "base_type": "type/Number", + "effective_type": "type/Number", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 28, + "nil%": 0 + }, + "type": { + "type/Number": { + "min": 0.199918, + "q1": 0.34496099999999996, + "q3": 0.4352085, + "max": 0.47286, + "sd": 0.06928869477079941, + "avg": 0.3833206785714286 + } + } + } + }, + { + "display_name": "AND_REDACTED/AND_VIEWERS", + "field_ref": [ + "field", + "AND_REDACTED/AND_VIEWERS", + { + "base-type": "type/Number" + } + ], + "name": "AND_REDACTED/AND_VIEWERS", + "base_type": "type/Number", + "effective_type": "type/Number", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 28, + "nil%": 0 + }, + "type": { + "type/Number": { + "min": 0.179613, + "q1": 0.245343, + "q3": 0.475772, + "max": 0.522253, + "sd": 0.11732033433182058, + "avg": 0.3620892142857142 + } + } + } + } + ], + "can_write": true, + "database_id": 3, + "enable_embedding": false, + "collection_id": 112, + "query_type": "native", + "name": "REDACTED iOS vs. Android", + "query_average_duration": 50982, + "creator_id": 42, + "moderation_reviews": [], + "updated_at": "2024-01-16T13:34:29.916717Z", + "made_public_by_id": null, + "embedding_params": null, + "cache_ttl": null, + "dataset_query": { + "type": "native", + "native": { + "query": "-- 1. Table with redacted search users Android\n-- 2. Table with redacted search users iOS \n-- 3. Redacted from Android redacted\n-- 4. redacted from iOS\n-- 5. Compare the numbers iOS vs. Android\n\n\n-- 1. Table with redacted search users Android (to include date, platform, auth_account_id)\n-- 2. Table with redacted search users iOS (to include date, platform, auth_account_id)\n-- 3. Redacted from Android redacted (to include date, platform, count of redacted)\n-- 4. Redacted from iOS redacted (to include date, plaform, count of redacted)\n-- 5. Compare the numbers iOS vs. Android\n\nwith AND_viewers as \n(\nselect event_date, platform, auth_account_id \nfrom TEAMS_PRD.REDACTED.MRT_CURR__MPARTICLE_SCREEN_VIEWS\nwhere screen_name='redacted_search'\nand event_date>'2023-12-01'\nand platform='Android'\nand dayofweekiso(event_date) NOT IN (6,7)\ngroup by event_date, platform, auth_account_id\norder by event_date desc\n), \niOS_viewers as \n(\nselect event_date, platform, auth_account_id \nfrom TEAMS_PRD.REDACTED.MRT_CURR__MPARTICLE_SCREEN_VIEWS\nwhere screen_name='redacted_search'\nand event_date>'2023-12-01'\nand platform='iOS'\nand dayofweekiso(event_date) NOT IN (6,7)\ngroup by event_date, platform, auth_account_id\norder by event_date desc\n), \nAND_redacted as\n(\nselect redacted_ts::date as redacted_date, platform, count(distinct at.auth_account_id) as AND_redacted, count(group_redacted_id) as AND_redacted\nfrom TEAMS_PRD.REDACTED.MRT_CURR__REDACTED_CUSTOMER at\njoin AND_viewers av on av.event_date=at.redacted_ts::date and av.auth_account_id=at.auth_account_id\nwhere instrument_type='REDACTED'\ngroup by 1,2\norder by 1 desc\n), \niOS_redacted as\n(\nselect redacted_ts::date as redacted_date, platform, count(distinct it.auth_account_id) as iOS_redacted, count(group_redacted_id) as iOS_redacted\nfrom TEAMS_PRD.REDACTED.MRT_CURR__REDACTED_CUSTOMER it\njoin iOS_viewers iv on iv.event_date=it.redacted_ts::date and iv.auth_account_id=it.auth_account_id\nwhere instrument_type='REDACTED'\ngroup by 1,2\norder by 1 desc\n)\nselect a.event_date, count(distinct a.auth_account_id) as AND_viewers, AND_redacted, AND_redacted, count(distinct i.auth_account_id) as iOS_viewers, iOS_redacted, iOS_redacted, iOS_redacted/iOS_viewers, AND_redacted/AND_viewers, iOS_redacted/iOS_viewers, AND_redacted/AND_viewers\nfrom AND_VIEWERS a\njoin AND_redacted at\non a.event_date=at.redacted_date\njoin ios_viewers i\non a.event_date=i.event_date\njoin ios_redacted it\non i.event_date=it.redacted_date\ngroup by 1, 3, 4, 6, 7\norder by 1 desc\n\n\n", + "template-tags": {} + }, + "database": 3 + }, + "id": 1, + "parameter_mappings": [], + "display": "line", + "entity_id": "DhQgvvtTEarZH8yQBlqES", + "collection_preview": true, + "visualization_settings": { + "graph.dimensions": [ + "EVENT_DATE" + ], + "series_settings": { + "IOS_REDACTED/IOS_VIEWERS": { + "axis": "right" + }, + "AND_REDACTED/AND_VIEWERS": { + "axis": "right" + } + }, + "graph.metrics": [ + "IOS_REDACTED/IOS_VIEWERS", + "AND_REDACTED/AND_VIEWERS", + "AND_VIEWERS", + "IOS_VIEWERS" + ] + }, + "metabase_version": "v0.48.3 (80d8323)", + "parameters": [], + "dataset": false, + "created_at": "2024-01-16T09:44:49.407327Z", + "public_uuid": null + }, + "updated_at": "2024-01-16T09:45:45.410379Z", + "col": 0, + "id": 12, + "parameter_mappings": [], + "card_id": 1, + "entity_id": "tA9M9vJlTHG0KxQnvknKW", + "visualization_settings": {}, + "size_y": 6, + "dashboard_id": 20, + "created_at": "2024-01-16T09:45:45.410379Z", + "row": 0 + }, + { + "size_x": 12, + "dashboard_tab_id": null, + "series": [], + "action_id": null, + "collection_authority_level": null, + "card": { + "description": null, + "archived": false, + "collection_position": null, + "table_id": null, + "result_metadata": [ + { + "display_name": "CALENDAR_DATE", + "field_ref": [ + "field", + "CALENDAR_DATE", + { + "base-type": "type/Date" + } + ], + "name": "CALENDAR_DATE", + "base_type": "type/Date", + "effective_type": "type/Date", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 30, + "nil%": 0 + }, + "type": { + "type/DateTime": { + "earliest": "2023-12-17T00:00:00Z", + "latest": "2024-01-15T00:00:00Z" + } + } + } + }, + { + "display_name": "REDACTED", + "field_ref": [ + "field", + "REDACTED", + { + "base-type": "type/Number" + } + ], + "name": "REDACTED", + "base_type": "type/Number", + "effective_type": "type/Number", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 27, + "nil%": 0 + }, + "type": { + "type/Number": { + "min": 682175, + "q1": 738644, + "q3": 805974, + "max": 847312, + "sd": 46783.99996291344, + "avg": 775505.5666666667 + } + } + } + }, + { + "display_name": "REDACTEDRS", + "field_ref": [ + "field", + "REDACTEDRS", + { + "base-type": "type/Number" + } + ], + "name": "REDACTEDRS", + "base_type": "type/Number", + "effective_type": "type/Number", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 27, + "nil%": 0 + }, + "type": { + "type/Number": { + "min": 46173, + "q1": 47556.94427191, + "q3": 48890, + "max": 50769, + "sd": 1164.9989906758983, + "avg": 48354.8 + } + } + } + }, + { + "display_name": "REDACTED/REDACTEDRS", + "field_ref": [ + "field", + "REDACTED/REDACTEDRS", + { + "base-type": "type/Number" + } + ], + "name": "REDACTED/REDACTEDRS", + "base_type": "type/Number", + "effective_type": "type/Number", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 27, + "nil%": 0 + }, + "type": { + "type/Number": { + "min": 14.706168, + "q1": 15.398378, + "q3": 16.920933, + "max": 17.289964, + "sd": 0.8020030995826715, + "avg": 16.033017833333336 + } + } + } + } + ], + "can_write": true, + "database_id": 3, + "enable_embedding": false, + "collection_id": 112, + "query_type": "native", + "name": "Redacted redacted per redacted user", + "query_average_duration": 20433, + "creator_id": 1, + "moderation_reviews": [], + "updated_at": "2024-01-16T13:34:29.916788Z", + "made_public_by_id": null, + "embedding_params": null, + "cache_ttl": null, + "dataset_query": { + "type": "native", + "native": { + "query": "with dd as (\nselect distinct calendar_date as calendar_date from TEAMS_PRD.DATA_PLATFORM_MART.MRT__CALENDAR_DATES\nwhere calendar_date>'2022-01-01'\n), \nredacted as\n(\nselect dd.calendar_date, count(distinct auth_account_id) as redacted, max(redacted_ts), min(redacted_ts)\nfrom TEAMS_PRD.REDACTED.MRT_CURR__REDACTED_CUSTOMER t\njoin dd on redacted_ts::date BETWEEN dd.calendar_date-29 and dd.calendar_date\nwhere redacted_type='REGULAR'\nand instrument_type = 'REDACTED'\ngroup by dd.calendar_date\norder by dd.calendar_date desc\n),\nredacted as\n(\nselect dd.calendar_date, count(group_redacted_id) as redacted, max(redacted_ts), min(redacted_ts)\nfrom TEAMS_PRD.REDACTED.MRT_CURR__REDACTED_CUSTOMER t\njoin dd on redacted_ts::date BETWEEN dd.calendar_date-29 and dd.calendar_date\nwhere redacted_type='REGULAR'\nand instrument_type = 'REDACTED'\ngroup by dd.calendar_date\norder by dd.calendar_date desc\n)\nselect dd.calendar_date, redacted, redacted, redacted/redacted\nfrom dd\njoin redacted t on dd.calendar_date=t.calendar_date\njoin redacted tr on dd.calendar_date=tr.calendar_date\ngroup by dd.calendar_date, redacted, redacted, redacted/redacted\norder by dd.calendar_date desc \nlimit 30", + "template-tags": {} + }, + "database": 3 + }, + "id": 2, + "parameter_mappings": [], + "display": "line", + "entity_id": "b1jUcPcQM0XFMuviv4g3K", + "collection_preview": true, + "visualization_settings": { + "graph.dimensions": [ + "CALENDAR_DATE" + ], + "series_settings": { + "REDACTEDRS": { + "axis": "right" + } + }, + "graph.metrics": [ + "REDACTED/REDACTEDRS", + "REDACTEDRS" + ] + }, + "metabase_version": "v0.48.3 (80d8323)", + "parameters": [], + "dataset": false, + "created_at": "2024-01-16T09:50:09.487369Z", + "public_uuid": null + }, + "updated_at": "2024-01-16T09:50:34.394488Z", + "col": 12, + "id": 1, + "parameter_mappings": [], + "card_id": 2, + "entity_id": "lXypX5aa14HjkN_Im82C2", + "visualization_settings": {}, + "size_y": 6, + "dashboard_id": 20, + "created_at": "2024-01-16T09:50:34.394488Z", + "row": 0 + }, + { + "size_x": 12, + "dashboard_tab_id": null, + "series": [], + "action_id": null, + "collection_authority_level": null, + "card": { + "description": null, + "archived": false, + "collection_position": null, + "table_id": null, + "result_metadata": [ + { + "display_name": "EVENT_DATE", + "field_ref": [ + "field", + "EVENT_DATE", + { + "base-type": "type/Date" + } + ], + "name": "EVENT_DATE", + "base_type": "type/Date", + "effective_type": "type/Date", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 11, + "nil%": 0 + }, + "type": { + "type/DateTime": { + "earliest": "2024-01-01T00:00:00Z", + "latest": "2024-01-15T00:00:00Z" + } + } + } + }, + { + "display_name": "KNOCKOUT", + "field_ref": [ + "field", + "KNOCKOUT", + { + "base-type": "type/Number" + } + ], + "name": "KNOCKOUT", + "base_type": "type/Number", + "effective_type": "type/Number", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 11, + "nil%": 0 + }, + "type": { + "type/Number": { + "min": 175, + "q1": 853.75, + "q3": 1116.75, + "max": 1174, + "sd": 296.0767713709648, + "avg": 916.3636363636364 + } + } + } + }, + { + "display_name": "EXPIRY", + "field_ref": [ + "field", + "EXPIRY", + { + "base-type": "type/Number" + } + ], + "name": "EXPIRY", + "base_type": "type/Number", + "effective_type": "type/Number", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 10, + "nil%": 0 + }, + "type": { + "type/Number": { + "min": 78, + "q1": 295.5, + "q3": 408.3925271309261, + "max": 431, + "sd": 105.10704500218294, + "avg": 336.90909090909093 + } + } + } + }, + { + "display_name": "PRODUCT", + "field_ref": [ + "field", + "PRODUCT", + { + "base-type": "type/Number" + } + ], + "name": "PRODUCT", + "base_type": "type/Number", + "effective_type": "type/Number", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 9, + "nil%": 0 + }, + "type": { + "type/Number": { + "min": 57, + "q1": 163.75, + "q3": 233, + "max": 255, + "sd": 59.31119777763877, + "avg": 195.27272727272728 + } + } + } + }, + { + "display_name": "ISSUER", + "field_ref": [ + "field", + "ISSUER", + { + "base-type": "type/Number" + } + ], + "name": "ISSUER", + "base_type": "type/Number", + "effective_type": "type/Number", + "semantic_type": null, + "fingerprint": { + "global": { + "distinct-count": 10, + "nil%": 0 + }, + "type": { + "type/Number": { + "min": 43, + "q1": 214, + "q3": 292.25, + "max": 304, + "sd": 79.35879397910594, + "avg": 245.72727272727272 + } + } + } + } + ], + "can_write": true, + "database_id": 3, + "enable_embedding": false, + "collection_id": 112, + "query_type": "native", + "name": "Filter popularity", + "query_average_duration": 2830, + "creator_id": 1, + "moderation_reviews": [], + "updated_at": "2024-01-16T13:34:30.128815Z", + "made_public_by_id": null, + "embedding_params": null, + "cache_ttl": null, + "dataset_query": { + "type": "native", + "native": { + "query": "with issuer as\n(\n select event_date, count(*) as issuer_clicks, count(distinct auth_account_id) as issuer\n from TEAMS_PRD.REDACTED.MRT_CURR__MPARTICLE_EVENTS\n where event_name='redacted_search_filter_button_tapped' \n and event_attributes:filter_option::varchar='issuer'\n and event_date>'2023-12-31'\n and platform='Android'\n and dayofweekiso(event_date) NOT IN (6,7)\n and event_attributes:redacted_type::varchar='knock_out_product'\n group by 1\n order by 1 desc\n), expiry as\n(\n select event_date, count(*) as expiry_clicks, count(distinct auth_account_id) as expiry\n from TEAMS_PRD.REDACTED.MRT_CURR__MPARTICLE_EVENTS\n where event_name='redacted_search_filter_button_tapped' \n and event_attributes:filter_option::varchar='expiry'\n and event_date>'2023-12-31'\n and platform='Android'\n and dayofweekiso(event_date) NOT IN (6,7)\n and event_attributes:redacted_type::varchar='knock_out_product'\n group by 1\n order by 1 desc\n), product as\n(\n select event_date, count(*) as product_clicks, count(distinct auth_account_id) as product\n from TEAMS_PRD.REDACTED.MRT_CURR__MPARTICLE_EVENTS\n where event_name='redacted_search_filter_button_tapped' \n and event_attributes:filter_option::varchar='product'\n and event_date>'2023-12-31'\n and platform='Android'\n and dayofweekiso(event_date) NOT IN (6,7)\n and event_attributes:redacted_type::varchar='knock_out_product'\n group by 1\n order by 1 desc\n), knockout as \n(\n select event_date, count(*) as knockout_clicks, count(distinct auth_account_id) as knockout\n from TEAMS_PRD.SCHEMA.MRT_CURR__MPARTICLE_EVENTS\n where event_name='redacted_search_filter_button_tapped' \n and event_attributes:filter_option::varchar='knockout'\n and event_date>'2023-12-31'\n and platform='Android'\n and dayofweekiso(event_date) NOT IN (6,7)\n and event_attributes:redacted_type::varchar='knock_out_product'\n group by 1\n order by 1 desc\n)\nselect k.event_date, knockout, expiry, product, issuer\nfrom knockout k\njoin expiry e on k.event_date=e.event_date\njoin issuer i on k.event_date=i.event_date\njoin product p on k.event_date=p.event_date\nwhere k.event_date dict: } def get_output_mce_path(self): - return "{}/{}".format(self.tmp_path, self.mces_output_file_name) + return f"{self.tmp_path}/{self.mces_output_file_name}" def get_mock_data_impl(self): return self.default_mock_data diff --git a/metadata-ingestion/tests/integration/powerbi/test_admin_only_api.py b/metadata-ingestion/tests/integration/powerbi/test_admin_only_api.py index 6f45dcf97f1dd..bb572fe3a7ae5 100644 --- a/metadata-ingestion/tests/integration/powerbi/test_admin_only_api.py +++ b/metadata-ingestion/tests/integration/powerbi/test_admin_only_api.py @@ -509,6 +509,7 @@ def default_source_config(): }, "env": "DEV", "extract_workspaces_to_containers": False, + "enable_advance_lineage_sql_construct": False, } diff --git a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py index b6cb578217a2c..d1b56c31d4cf6 100644 --- a/metadata-ingestion/tests/integration/powerbi/test_m_parser.py +++ b/metadata-ingestion/tests/integration/powerbi/test_m_parser.py @@ -46,6 +46,7 @@ 'let\n Source = Value.NativeQuery(AmazonRedshift.Database("redshift-url","dev"), "select * from dev.public.category", null, [EnableFolding=true]) \n in Source', 'let\n Source = Databricks.Catalogs("adb-123.azuredatabricks.net", "/sql/1.0/endpoints/12345dc91aa25844", [Catalog=null, Database=null]),\n hive_metastore_Database = Source{[Name="hive_metastore",Kind="Database"]}[Data],\n sandbox_revenue_Schema = hive_metastore_Database{[Name="sandbox_revenue",Kind="Schema"]}[Data],\n public_consumer_price_index_Table = sandbox_revenue_Schema{[Name="public_consumer_price_index",Kind="Table"]}[Data],\n #"Renamed Columns" = Table.RenameColumns(public_consumer_price_index_Table,{{"Country", "country"}, {"Metric", "metric"}}),\n #"Inserted Year" = Table.AddColumn(#"Renamed Columns", "ID", each Date.Year([date_id]) + Date.Month([date_id]), Text.Type),\n #"Added Custom" = Table.AddColumn(#"Inserted Year", "Custom", each Text.Combine({Number.ToText(Date.Year([date_id])), Number.ToText(Date.Month([date_id])), [country]})),\n #"Removed Columns" = Table.RemoveColumns(#"Added Custom",{"ID"}),\n #"Renamed Columns1" = Table.RenameColumns(#"Removed Columns",{{"Custom", "ID"}}),\n #"Filtered Rows" = Table.SelectRows(#"Renamed Columns1", each ([metric] = "Consumer Price Index") and (not Number.IsNaN([value])))\nin\n #"Filtered Rows"', "let\n Source = Value.NativeQuery(Snowflake.Databases(\"bu10758.ap-unknown-2.fakecomputing.com\",\"operations_analytics_warehouse_prod\",[Role=\"OPERATIONS_ANALYTICS_MEMBER\"]){[Name=\"OPERATIONS_ANALYTICS\"]}[Data], \"select #(lf)UPPER(REPLACE(AGENT_NAME,'-','')) AS CLIENT_DIRECTOR,#(lf)TIER,#(lf)UPPER(MANAGER),#(lf)TEAM_TYPE,#(lf)DATE_TARGET,#(lf)MONTHID,#(lf)TARGET_TEAM,#(lf)SELLER_EMAIL,#(lf)concat((UPPER(REPLACE(AGENT_NAME,'-',''))), MONTHID) as AGENT_KEY,#(lf)UNIT_TARGET AS SME_Quota,#(lf)AMV_TARGET AS Revenue_Quota,#(lf)SERVICE_QUOTA,#(lf)BL_TARGET,#(lf)SOFTWARE_QUOTA as Software_Quota#(lf)#(lf)from OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT_TARGETS inner join OPERATIONS_ANALYTICS.TRANSFORMED_PROD.V_SME_UNIT #(lf)#(lf)where YEAR_TARGET >= 2022#(lf)and TEAM_TYPE = 'Accounting'#(lf)and TARGET_TEAM = 'Enterprise'#(lf)AND TIER = 'Client Director'\", null, [EnableFolding=true])\nin\n Source", + 'let\n Source = Value.NativeQuery(Snowflake.Databases("0DD93C6BD5A6.snowflakecomputing.com","sales_analytics_warehouse_prod",[Role="sales_analytics_member_ad"]){[Name="ORDERING"]}[Data], "SELECT#(lf) DISTINCT#(lf) T5.PRESENTMENT_START_DATE#(lf),T5.PRESENTMENT_END_DATE#(lf),T5.DISPLAY_NAME#(lf),T5.NAME#(tab)#(lf),T5.PROMO_DISPLAY_NAME#(lf),T5.REGION#(lf),T5.ID#(lf),T5.WALKOUT#(lf),T6.DEAL_ID#(lf),T6.TYPE#(lf),T5.FREE_PERIOD#(lf),T6.PRICE_MODIFICATION#(lf)#(lf)FROM#(lf)#(lf)(#(lf) SELECT #(lf) T1.NAME#(lf),DATE(T1.CREATED_AT) as CREATED_AT#(lf),T1.PROMO_CODE#(lf),T1.STATUS#(lf),DATE(T1.UPDATED_AT) as UPDATED_AT#(lf),T1.ID#(lf),T1.DISPLAY_NAME as PROMO_DISPLAY_NAME#(lf),T4.*#(lf)FROM#(lf)(SELECT#(lf) DISTINCT#(lf) NAME#(lf),CREATED_AT#(lf),PROMO_CODE#(lf),STATUS#(lf),UPDATED_AT#(lf),ID#(lf),DISPLAY_NAME#(lf) FROM RAW.PROMOTIONS#(lf)#(lf)) T1#(lf)INNER JOIN#(lf)#(lf) (#(lf) SELECT #(lf) T3.PRODUCT_STATUS#(lf),T3.CODE#(lf),T3.REGION#(lf),T3.DISPLAY_ORDER_SEQUENCE#(lf),T3.PRODUCT_LINE_ID#(lf),T3.DISPLAY_NAME#(lf),T3.PRODUCT_TYPE#(lf),T3.ID as PROD_TBL_ID#(lf),T3.NAME as PROD_TBL_NAME#(lf),DATE(T2.PRESENTMENT_END_DATE) as PRESENTMENT_END_DATE#(lf),T2.PRICE_COMMITMENT_PERIOD#(lf),T2.NAME as SEAL_TBL_NAME#(lf),DATE(T2.CREATED_AT) as SEAL_TBL_CREATED_AT#(lf),T2.DESCRIPTION#(lf),T2.FREE_PERIOD#(lf),T2.WALKOUT#(lf),T2.PRODUCT_CAT_ID#(lf),T2.PROMOTION_ID#(lf),DATE(T2.PRESENTMENT_START_DATE) as PRESENTMENT_START_DATE#(lf),YEAR(T2.PRESENTMENT_START_DATE) as DEAL_YEAR_START#(lf),MONTH(T2.PRESENTMENT_START_DATE) as DEAL_MONTH_START#(lf),T2.DEAL_TYPE#(lf),DATE(T2.UPDATED_AT) as SEAL_TBL_UPDATED_AT#(lf),T2.ID as SEAL_TBL_ID#(lf),T2.STATUS as SEAL_TBL_STATUS#(lf)FROM#(lf)(SELECT#(lf) DISTINCT#(lf) PRODUCT_STATUS#(lf),CODE#(lf),REGION#(lf),DISPLAY_ORDER_SEQUENCE#(lf),PRODUCT_LINE_ID#(lf),DISPLAY_NAME#(lf),PRODUCT_TYPE#(lf),ID #(lf),NAME #(lf) FROM#(lf) RAW.PRODUCTS#(lf)#(lf)) T3#(lf)INNER JOIN#(lf)(#(lf) SELECT#(lf) DISTINCT#(lf) PRESENTMENT_END_DATE#(lf),PRICE_COMMITMENT_PERIOD#(lf),NAME#(lf),CREATED_AT#(lf),DESCRIPTION#(lf),FREE_PERIOD#(lf),WALKOUT#(lf),PRODUCT_CAT_ID#(lf),PROMOTION_ID#(lf),PRESENTMENT_START_DATE#(lf),DEAL_TYPE#(lf),UPDATED_AT#(lf),ID#(lf),STATUS#(lf) FROM#(lf) RAW.DEALS#(lf)#(lf)) T2#(lf)ON#(lf)T3.ID = T2.PRODUCT_CAT_ID #(lf)WHERE#(lf)T2.PRESENTMENT_START_DATE >= \'2015-01-01\'#(lf)AND#(lf)T2.STATUS = \'active\'#(lf)#(lf))T4#(lf)ON#(lf)T1.ID = T4.PROMOTION_ID#(lf))T5#(lf)INNER JOIN#(lf)RAW.PRICE_MODIFICATIONS T6#(lf)ON#(lf)T5.SEAL_TBL_ID = T6.DEAL_ID", null, [EnableFolding=true]) \n in \n Source', ] @@ -59,6 +60,7 @@ def get_default_instances( "tenant_id": "fake", "client_id": "foo", "client_secret": "bar", + "enable_advance_lineage_sql_construct": False, **override_config, } ) @@ -763,3 +765,43 @@ def test_sqlglot_parser(): assert lineage[0].column_lineage[i].downstream.table is None assert lineage[0].column_lineage[i].downstream.column == column assert lineage[0].column_lineage[i].upstreams == [] + + +def test_sqlglot_parser_2(): + table: powerbi_data_classes.Table = powerbi_data_classes.Table( + expression=M_QUERIES[25], + name="SALES_TARGET", + full_name="dev.public.sales", + ) + reporter = PowerBiDashboardSourceReport() + + ctx, config, platform_instance_resolver = get_default_instances( + override_config={ + "server_to_platform_instance": { + "0DD93C6BD5A6.snowflakecomputing.com": { + "platform_instance": "sales_deployment", + "env": "PROD", + } + }, + "native_query_parsing": True, + "enable_advance_lineage_sql_construct": True, + } + ) + + lineage: List[resolver.Lineage] = parser.get_upstream_tables( + table, + reporter, + ctx=ctx, + config=config, + platform_instance_resolver=platform_instance_resolver, + ) + + data_platform_tables: List[DataPlatformTable] = lineage[0].upstreams + + assert len(data_platform_tables) == 4 + assert [dpt.urn for dpt in data_platform_tables] == [ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,sales_deployment.raw.deals,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:snowflake,sales_deployment.raw.price_modifications,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:snowflake,sales_deployment.raw.products,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:snowflake,sales_deployment.raw.promotions,PROD)", + ] diff --git a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py index 2c355d17d3c3d..fafa02d5301e0 100644 --- a/metadata-ingestion/tests/integration/powerbi/test_powerbi.py +++ b/metadata-ingestion/tests/integration/powerbi/test_powerbi.py @@ -634,6 +634,7 @@ def default_source_config(): }, "env": "DEV", "extract_workspaces_to_containers": False, + "enable_advance_lineage_sql_construct": False, } diff --git a/metadata-ingestion/tests/integration/salesforce/mock_files/account_fields_soql_response.json b/metadata-ingestion/tests/integration/salesforce/mock_files/account_fields_soql_response.json index d00f4b1832418..2f75198d952db 100644 --- a/metadata-ingestion/tests/integration/salesforce/mock_files/account_fields_soql_response.json +++ b/metadata-ingestion/tests/integration/salesforce/mock_files/account_fields_soql_response.json @@ -2545,6 +2545,42 @@ "RelationshipName": null, "IsNillable": true }, + { + "attributes": { + "type": "EntityParticle", + "url": "/services/data/v54.0/tooling/sobjects/EntityParticle/Account.Blank_Label" + }, + "QualifiedApiName": "Blank_Label", + "DeveloperName": "Blank_Label", + "Label": null, + "InlineHelpText": "# Help Text", + "FieldDefinition": { + "attributes": { + "type": "FieldDefinition", + "url": "/services/data/v54.0/tooling/sobjects/FieldDefinition/Account.Blank_Label" + }, + "DataType": "Text(80)", + "LastModifiedDate": null, + "LastModifiedBy": null, + "IsIndexed": false, + "ComplianceGroup": null, + "SecurityClassification": null, + "Description": "This is the # description" + }, + "DataType": "string", + "Precision": 0, + "Scale": 0, + "Length": 80, + "Digits": 0, + "IsUnique": false, + "IsCompound": false, + "IsComponent": false, + "ReferenceTo": { + "referenceTo": null + }, + "RelationshipName": null, + "IsNillable": true + }, { "attributes": { "type": "EntityParticle", diff --git a/metadata-ingestion/tests/integration/salesforce/salesforce_mces_golden.json b/metadata-ingestion/tests/integration/salesforce/salesforce_mces_golden.json index 98deb2a2a7ad2..6a3ce983950b0 100644 --- a/metadata-ingestion/tests/integration/salesforce/salesforce_mces_golden.json +++ b/metadata-ingestion/tests/integration/salesforce/salesforce_mces_golden.json @@ -21,7 +21,8 @@ }, "systemMetadata": { "lastObserved": 1652353200000, - "runId": "salesforce-test" + "runId": "salesforce-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1368,6 +1369,23 @@ "isPartOfKey": false, "jsonProps": "{}" }, + { + "fieldPath": "Blank_Label", + "nullable": true, + "description": "\n\nThis is the # description\n\n\\# Help Text", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "Text(80)", + "recursive": false, + "globalTags": { + "tags": [] + }, + "isPartOfKey": false, + "jsonProps": "{}" + }, { "fieldPath": "Unique_Number", "nullable": true, @@ -1465,7 +1483,8 @@ }, "systemMetadata": { "lastObserved": 1652353200000, - "runId": "salesforce-test" + "runId": "salesforce-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1482,7 +1501,8 @@ }, "systemMetadata": { "lastObserved": 1652353200000, - "runId": "salesforce-test" + "runId": "salesforce-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1504,7 +1524,8 @@ }, "systemMetadata": { "lastObserved": 1652353200000, - "runId": "salesforce-test" + "runId": "salesforce-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1526,7 +1547,8 @@ }, "systemMetadata": { "lastObserved": 1652353200000, - "runId": "salesforce-test" + "runId": "salesforce-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1554,7 +1576,8 @@ }, "systemMetadata": { "lastObserved": 1652353200000, - "runId": "salesforce-test" + "runId": "salesforce-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1951,7 +1974,8 @@ }, "systemMetadata": { "lastObserved": 1652353200000, - "runId": "salesforce-test" + "runId": "salesforce-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1968,7 +1992,8 @@ }, "systemMetadata": { "lastObserved": 1652353200000, - "runId": "salesforce-test" + "runId": "salesforce-test", + "lastRunId": "no-run-id-provided" } }, { @@ -1985,7 +2010,8 @@ }, "systemMetadata": { "lastObserved": 1652353200000, - "runId": "salesforce-test" + "runId": "salesforce-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2006,7 +2032,8 @@ }, "systemMetadata": { "lastObserved": 1652353200000, - "runId": "salesforce-test" + "runId": "salesforce-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2021,7 +2048,8 @@ }, "systemMetadata": { "lastObserved": 1652353200000, - "runId": "salesforce-test" + "runId": "salesforce-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2036,7 +2064,8 @@ }, "systemMetadata": { "lastObserved": 1652353200000, - "runId": "salesforce-test" + "runId": "salesforce-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2051,7 +2080,8 @@ }, "systemMetadata": { "lastObserved": 1652353200000, - "runId": "salesforce-test" + "runId": "salesforce-test", + "lastRunId": "no-run-id-provided" } }, { @@ -2066,7 +2096,8 @@ }, "systemMetadata": { "lastObserved": 1652353200000, - "runId": "salesforce-test" + "runId": "salesforce-test", + "lastRunId": "no-run-id-provided" } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/snowflake/common.py b/metadata-ingestion/tests/integration/snowflake/common.py index 74c89bf73bdef..30c4b2bec3a04 100644 --- a/metadata-ingestion/tests/integration/snowflake/common.py +++ b/metadata-ingestion/tests/integration/snowflake/common.py @@ -1,5 +1,7 @@ import json +import random from datetime import datetime, timezone +from unittest.mock import MagicMock from datahub.configuration.common import AllowDenyPattern from datahub.configuration.time_window_config import BucketDuration @@ -10,9 +12,158 @@ NUM_VIEWS = 2 NUM_COLS = 10 NUM_OPS = 10 - +NUM_USAGE = 0 FROZEN_TIME = "2022-06-07 17:00:00" +large_sql_query = """WITH object_access_history AS + ( + select + object.value : "objectName"::varchar AS object_name, + object.value : "objectDomain"::varchar AS object_domain, + object.value : "columns" AS object_columns, + query_start_time, + query_id, + user_name + from + ( + select + query_id, + query_start_time, + user_name, + -- Construct the email in the query, should match the Python behavior. + -- The user_email is only used by the email_filter_query. + NVL(USERS.email, CONCAT(LOWER(user_name), '')) AS user_email, + DIRECT_OBJECTS_ACCESSED + from + snowflake.account_usage.access_history + LEFT JOIN + snowflake.account_usage.users USERS + ON user_name = users.name + WHERE + query_start_time >= to_timestamp_ltz(1710720000000, 3) + AND query_start_time < to_timestamp_ltz(1713332173148, 3) + ) + t, + lateral flatten(input => t.DIRECT_OBJECTS_ACCESSED) object + where + NOT RLIKE(object_name,'.*\\.FIVETRAN_.*_STAGING\\..*','i') AND NOT RLIKE(object_name,'.*__DBT_TMP$','i') AND NOT RLIKE(object_name,'.*\\.SEGMENT_[a-f0-9]{8}[-_][a-f0-9]{4}[-_][a-f0-9]{4}[-_][a-f0-9]{4}[-_][a-f0-9]{12}','i') AND NOT RLIKE(object_name,'.*\\.STAGING_.*_[a-f0-9]{8}[-_][a-f0-9]{4}[-_][a-f0-9]{4}[-_][a-f0-9]{4}[-_][a-f0-9]{12}','i') + ) + , + field_access_history AS + ( + select + o.*, + col.value : "columnName"::varchar AS column_name + from + object_access_history o, + lateral flatten(input => o.object_columns) col + ) + , + basic_usage_counts AS + ( + SELECT + object_name, + ANY_VALUE(object_domain) AS object_domain, + DATE_TRUNC('DAY', CONVERT_TIMEZONE('UTC', query_start_time)) AS bucket_start_time, + count(distinct(query_id)) AS total_queries, + count( distinct(user_name) ) AS total_users + FROM + object_access_history + GROUP BY + bucket_start_time, + object_name + ) + , + field_usage_counts AS + ( + SELECT + object_name, + column_name, + DATE_TRUNC('DAY', CONVERT_TIMEZONE('UTC', query_start_time)) AS bucket_start_time, + count(distinct(query_id)) AS total_queries + FROM + field_access_history + GROUP BY + bucket_start_time, + object_name, + column_name + ) + , + user_usage_counts AS + ( + SELECT + object_name, + DATE_TRUNC('DAY', CONVERT_TIMEZONE('UTC', query_start_time)) AS bucket_start_time, + count(distinct(query_id)) AS total_queries, + user_name, + ANY_VALUE(users.email) AS user_email + FROM + object_access_history + LEFT JOIN + snowflake.account_usage.users users + ON user_name = users.name + GROUP BY + bucket_start_time, + object_name, + user_name + ) + , + top_queries AS + ( + SELECT + object_name, + DATE_TRUNC('DAY', CONVERT_TIMEZONE('UTC', query_start_time)) AS bucket_start_time, + query_history.query_text AS query_text, + count(distinct(access_history.query_id)) AS total_queries + FROM + object_access_history access_history + LEFT JOIN + ( + SELECT * FROM snowflake.account_usage.query_history + WHERE query_history.start_time >= to_timestamp_ltz(1710720000000, 3) + AND query_history.start_time < to_timestamp_ltz(1713332173148, 3) + ) query_history + ON access_history.query_id = query_history.query_id + GROUP BY + bucket_start_time, + object_name, + query_text + QUALIFY row_number() over ( partition by bucket_start_time, object_name + order by + total_queries desc, query_text asc ) <= 10 + ) + select + basic_usage_counts.object_name AS "OBJECT_NAME", + basic_usage_counts.bucket_start_time AS "BUCKET_START_TIME", + ANY_VALUE(basic_usage_counts.object_domain) AS "OBJECT_DOMAIN", + ANY_VALUE(basic_usage_counts.total_queries) AS "TOTAL_QUERIES", + ANY_VALUE(basic_usage_counts.total_users) AS "TOTAL_USERS", + ARRAY_UNIQUE_AGG(top_queries.query_text) AS "TOP_SQL_QUERIES", + ARRAY_UNIQUE_AGG(OBJECT_CONSTRUCT( 'col', field_usage_counts.column_name, 'total', field_usage_counts.total_queries ) ) AS "FIELD_COUNTS", + ARRAY_UNIQUE_AGG(OBJECT_CONSTRUCT( 'user_name', user_usage_counts.user_name, 'email', user_usage_counts.user_email, 'total', user_usage_counts.total_queries ) ) AS "USER_COUNTS" + from + basic_usage_counts basic_usage_counts + left join + top_queries top_queries + on basic_usage_counts.bucket_start_time = top_queries.bucket_start_time + and basic_usage_counts.object_name = top_queries.object_name + left join + field_usage_counts field_usage_counts + on basic_usage_counts.bucket_start_time = field_usage_counts.bucket_start_time + and basic_usage_counts.object_name = field_usage_counts.object_name + left join + user_usage_counts user_usage_counts + on basic_usage_counts.bucket_start_time = user_usage_counts.bucket_start_time + and basic_usage_counts.object_name = user_usage_counts.object_name + where + basic_usage_counts.object_domain in ('Table','External table','View','Materialized view') + and basic_usage_counts.object_name is not null + group by + basic_usage_counts.object_name, + basic_usage_counts.bucket_start_time + order by + basic_usage_counts.bucket_start_time + """ def default_query_results( # noqa: C901 @@ -21,6 +172,7 @@ def default_query_results( # noqa: C901 num_views=NUM_VIEWS, num_cols=NUM_COLS, num_ops=NUM_OPS, + num_usages=NUM_USAGE, ): if query == SnowflakeQuery.current_account(): return [{"CURRENT_ACCOUNT()": "ABC12345"}] @@ -78,7 +230,7 @@ def default_query_results( # noqa: C901 return [ { "TABLE_SCHEMA": "TEST_SCHEMA", - "TABLE_NAME": "TABLE_{}".format(tbl_idx), + "TABLE_NAME": f"TABLE_{tbl_idx}", "TABLE_TYPE": "BASE TABLE", "CREATED": datetime(2021, 6, 8, 0, 0, 0, 0), "LAST_ALTERED": datetime(2021, 6, 8, 0, 0, 0, 0), @@ -93,7 +245,7 @@ def default_query_results( # noqa: C901 return [ { "schema_name": "TEST_SCHEMA", - "name": "VIEW_{}".format(view_idx), + "name": f"VIEW_{view_idx}", "created_on": datetime(2021, 6, 8, 0, 0, 0, 0), "comment": "Comment for View", "text": f"create view view_{view_idx} as select * from table_{view_idx}", @@ -105,13 +257,13 @@ def default_query_results( # noqa: C901 elif query in [ *[ SnowflakeQuery.columns_for_table( - "TABLE_{}".format(tbl_idx), "TEST_SCHEMA", "TEST_DB" + f"TABLE_{tbl_idx}", "TEST_SCHEMA", "TEST_DB" ) for tbl_idx in range(1, num_tables + 1) ], *[ SnowflakeQuery.columns_for_table( - "VIEW_{}".format(view_idx), "TEST_SCHEMA", "TEST_DB" + f"VIEW_{view_idx}", "TEST_SCHEMA", "TEST_DB" ) for view_idx in range(1, num_views + 1) ], @@ -121,7 +273,7 @@ def default_query_results( # noqa: C901 # "TABLE_CATALOG": "TEST_DB", # "TABLE_SCHEMA": "TEST_SCHEMA", # "TABLE_NAME": "TABLE_{}".format(tbl_idx), - "COLUMN_NAME": "COL_{}".format(col_idx), + "COLUMN_NAME": f"COL_{col_idx}", "ORDINAL_POSITION": col_idx, "IS_NULLABLE": "NO", "DATA_TYPE": "TEXT" if col_idx > 1 else "NUMBER", @@ -165,7 +317,7 @@ def default_query_results( # noqa: C901 [ { "columns": [ - {"columnId": 0, "columnName": "COL_{}".format(col_idx)} + {"columnId": 0, "columnName": f"COL_{col_idx}"} for col_idx in range(1, num_cols + 1) ], "objectDomain": "Table", @@ -174,7 +326,7 @@ def default_query_results( # noqa: C901 }, { "columns": [ - {"columnId": 0, "columnName": "COL_{}".format(col_idx)} + {"columnId": 0, "columnName": f"COL_{col_idx}"} for col_idx in range(1, num_cols + 1) ], "objectDomain": "Table", @@ -183,7 +335,7 @@ def default_query_results( # noqa: C901 }, { "columns": [ - {"columnId": 0, "columnName": "COL_{}".format(col_idx)} + {"columnId": 0, "columnName": f"COL_{col_idx}"} for col_idx in range(1, num_cols + 1) ], "objectDomain": "Table", @@ -196,7 +348,7 @@ def default_query_results( # noqa: C901 [ { "columns": [ - {"columnId": 0, "columnName": "COL_{}".format(col_idx)} + {"columnId": 0, "columnName": f"COL_{col_idx}"} for col_idx in range(1, num_cols + 1) ], "objectDomain": "Table", @@ -205,7 +357,7 @@ def default_query_results( # noqa: C901 }, { "columns": [ - {"columnId": 0, "columnName": "COL_{}".format(col_idx)} + {"columnId": 0, "columnName": f"COL_{col_idx}"} for col_idx in range(1, num_cols + 1) ], "objectDomain": "Table", @@ -214,7 +366,7 @@ def default_query_results( # noqa: C901 }, { "columns": [ - {"columnId": 0, "columnName": "COL_{}".format(col_idx)} + {"columnId": 0, "columnName": f"COL_{col_idx}"} for col_idx in range(1, num_cols + 1) ], "objectDomain": "Table", @@ -229,10 +381,10 @@ def default_query_results( # noqa: C901 "columns": [ { "columnId": 0, - "columnName": "COL_{}".format(col_idx), + "columnName": f"COL_{col_idx}", "directSources": [ { - "columnName": "COL_{}".format(col_idx), + "columnName": f"COL_{col_idx}", "objectDomain": "Table", "objectId": 0, "objectName": "TEST_DB.TEST_SCHEMA.TABLE_2", @@ -243,7 +395,7 @@ def default_query_results( # noqa: C901 ], "objectDomain": "Table", "objectId": 0, - "objectName": "TEST_DB.TEST_SCHEMA.TABLE_{}".format(op_idx), + "objectName": f"TEST_DB.TEST_SCHEMA.TABLE_{op_idx}", } ] ), @@ -269,7 +421,30 @@ def default_query_results( # noqa: C901 email_filter=AllowDenyPattern.allow_all(), ) ): - return [] + mock = MagicMock() + mock.__iter__.return_value = [ + { + "OBJECT_NAME": f"TEST_DB.TEST_SCHEMA.TABLE_{i}{random.randint(99, 999) if i > num_tables else ''}", + "BUCKET_START_TIME": datetime(2022, 6, 6, 0, 0, 0, 0).replace( + tzinfo=timezone.utc + ), + "OBJECT_DOMAIN": "Table", + "TOTAL_QUERIES": 10, + "TOTAL_USERS": 1, + "TOP_SQL_QUERIES": json.dumps([large_sql_query for _ in range(10)]), + "FIELD_COUNTS": json.dumps( + [{"col": f"col{c}", "total": 10} for c in range(num_cols)] + ), + "USER_COUNTS": json.dumps( + [ + {"email": f"abc{i}@xyz.com", "user_name": f"abc{i}", "total": 1} + for i in range(10) + ] + ), + } + for i in range(num_usages) + ] + return mock elif query in ( snowflake_query.SnowflakeQuery.table_to_table_lineage_history( 1654473600000, @@ -281,11 +456,11 @@ def default_query_results( # noqa: C901 ): return [ { - "DOWNSTREAM_TABLE_NAME": "TEST_DB.TEST_SCHEMA.TABLE_{}".format(op_idx), + "DOWNSTREAM_TABLE_NAME": f"TEST_DB.TEST_SCHEMA.TABLE_{op_idx}", "UPSTREAM_TABLE_NAME": "TEST_DB.TEST_SCHEMA.TABLE_2", "UPSTREAM_TABLE_COLUMNS": json.dumps( [ - {"columnId": 0, "columnName": "COL_{}".format(col_idx)} + {"columnId": 0, "columnName": f"COL_{col_idx}"} for col_idx in range(1, num_cols + 1) ] ), @@ -293,10 +468,10 @@ def default_query_results( # noqa: C901 [ { "columnId": 0, - "columnName": "COL_{}".format(col_idx), + "columnName": f"COL_{col_idx}", "directSources": [ { - "columnName": "COL_{}".format(col_idx), + "columnName": f"COL_{col_idx}", "objectDomain": "Table", "objectId": 0, "objectName": "TEST_DB.TEST_SCHEMA.TABLE_2", @@ -344,7 +519,7 @@ def default_query_results( # noqa: C901 return [ { - "DOWNSTREAM_TABLE_NAME": "TEST_DB.TEST_SCHEMA.TABLE_{}".format(op_idx), + "DOWNSTREAM_TABLE_NAME": f"TEST_DB.TEST_SCHEMA.TABLE_{op_idx}", "DOWNSTREAM_TABLE_DOMAIN": "TABLE", "UPSTREAM_TABLES": json.dumps( [ @@ -434,7 +609,7 @@ def default_query_results( # noqa: C901 ): return [ { - "DOWNSTREAM_TABLE_NAME": "TEST_DB.TEST_SCHEMA.TABLE_{}".format(op_idx), + "DOWNSTREAM_TABLE_NAME": f"TEST_DB.TEST_SCHEMA.TABLE_{op_idx}", "DOWNSTREAM_TABLE_DOMAIN": "TABLE", "UPSTREAM_TABLES": json.dumps( [ @@ -515,7 +690,7 @@ def default_query_results( # noqa: C901 "VIEW_DOMAIN": "VIEW", "VIEW_COLUMNS": json.dumps( [ - {"columnId": 0, "columnName": "COL_{}".format(col_idx)} + {"columnId": 0, "columnName": f"COL_{col_idx}"} for col_idx in range(1, num_cols + 1) ] ), @@ -524,10 +699,10 @@ def default_query_results( # noqa: C901 [ { "columnId": 0, - "columnName": "COL_{}".format(col_idx), + "columnName": f"COL_{col_idx}", "directSources": [ { - "columnName": "COL_{}".format(col_idx), + "columnName": f"COL_{col_idx}", "objectDomain": "Table", "objectId": 0, "objectName": "TEST_DB.TEST_SCHEMA.TABLE_2", diff --git a/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py b/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py index 65c259e8acdc3..9760ea1a9c72b 100644 --- a/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py +++ b/metadata-ingestion/tests/integration/snowflake/test_snowflake_failures.py @@ -169,7 +169,7 @@ def test_snowflake_list_columns_error_causes_pipeline_warning( default_query_results, [ SnowflakeQuery.columns_for_table( - "TABLE_{}".format(tbl_idx), "TEST_SCHEMA", "TEST_DB" + f"TABLE_{tbl_idx}", "TEST_SCHEMA", "TEST_DB" ) for tbl_idx in range(1, NUM_TABLES + 1) ], diff --git a/metadata-ingestion/tests/integration/sql_server/test_sql_server.py b/metadata-ingestion/tests/integration/sql_server/test_sql_server.py index f439a322c2677..4e9b4bee8ce6b 100644 --- a/metadata-ingestion/tests/integration/sql_server/test_sql_server.py +++ b/metadata-ingestion/tests/integration/sql_server/test_sql_server.py @@ -23,9 +23,7 @@ def mssql_runner(docker_compose_runner, pytestconfig): # Run the setup.sql file to populate the database. command = "docker exec testsqlserver /opt/mssql-tools/bin/sqlcmd -S localhost -U sa -P 'test!Password' -d master -i /setup/setup.sql" - ret = subprocess.run( - command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE - ) + ret = subprocess.run(command, shell=True, capture_output=True) assert ret.returncode == 0 yield docker_services diff --git a/metadata-ingestion/tests/integration/tableau/setup/embeddedDatasourcesConnection_all.json b/metadata-ingestion/tests/integration/tableau/setup/embeddedDatasourcesConnection_all.json index bdb5013c2ea48..a558eea2e8dd8 100644 --- a/metadata-ingestion/tests/integration/tableau/setup/embeddedDatasourcesConnection_all.json +++ b/metadata-ingestion/tests/integration/tableau/setup/embeddedDatasourcesConnection_all.json @@ -12889,6 +12889,76 @@ "username": "jawadqu@gmail.com" } } + }, + { + "__typename": "EmbeddedDatasource", + "id": "5449c627-7462-4ef7-b492-bda46be068e3", + "name": "New DataSource", + "hasExtracts": true, + "extractLastRefreshTime": "2018-02-09T00:05:25Z", + "extractLastIncrementalUpdateTime": null, + "extractLastUpdateTime": "2018-02-09T00:05:25Z", + "upstreamDatabases": [], + "upstreamTables": [], + "downstreamSheets": [], + "fields": [ + { + "__typename": "ColumnField", + "id": "55a70afe-3e54-492d-80c8-744601ae78cc", + "name": "Program ID", + "description": null, + "isHidden": false, + "folderName": null, + "dataCategory": "NOMINAL", + "role": "DIMENSION", + "dataType": "STRING", + "defaultFormat": null, + "aggregation": null, + "upstreamFields": [], + "upstreamColumns": [ + { + "name": "id", + "table": { + "__typename": "CustomSQLTable", + "id": "c7dd65fb-6e7e-4091-bbde-8c78b34a40f8" + } + } + ] + }, + { + "__typename": "ColumnField", + "id": "636b9454-8786-4773-b94b-8e8f2db7e1a3", + "name": "Name", + "description": null, + "isHidden": false, + "folderName": null, + "dataCategory": "NOMINAL", + "role": "DIMENSION", + "dataType": "STRING", + "defaultFormat": null, + "aggregation": null, + "upstreamFields": [], + "upstreamColumns": [ + { + "name": "name", + "table": { + "__typename": "CustomSQLTable", + "id": "c7dd65fb-6e7e-4091-bbde-8c78b34a40f8" + } + } + ] + } + ], + "upstreamDatasources": [], + "workbook": { + "id": "bd040833-8f66-22c0-1b51-bd4ccf5eef7c", + "name": "Workbook published ds", + "projectName": "default", + "luid": "a059a443-7634-4abf-9e46-d147b99168be", + "owner": { + "username": "jawadqu@gmail.com" + } + } } ], "pageInfo": { diff --git a/metadata-ingestion/tests/integration/tableau/tableau_cll_mces_golden.json b/metadata-ingestion/tests/integration/tableau/tableau_cll_mces_golden.json index 3f481207a03ea..d8c27057872c8 100644 --- a/metadata-ingestion/tests/integration/tableau/tableau_cll_mces_golden.json +++ b/metadata-ingestion/tests/integration/tableau/tableau_cll_mces_golden.json @@ -31608,6 +31608,234 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:tableau,c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD),Name)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD),Program ID)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.DataPlatformInstance": { + "platform": "urn:li:dataPlatform:tableau" + } + }, + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/prod/tableau/default/Workbook published ds" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:jawadqu@gmail.com", + "type": "DATAOWNER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "hasExtracts": "True", + "extractLastRefreshTime": "2018-02-09T00:05:25Z", + "extractLastUpdateTime": "2018-02-09T00:05:25Z" + }, + "name": "New DataSource", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "test", + "platform": "urn:li:dataPlatform:tableau", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "Program ID", + "nullable": false, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:DIMENSION" + }, + { + "tag": "urn:li:tag:COLUMNFIELD" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "Name", + "nullable": false, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:DIMENSION" + }, + { + "tag": "urn:li:tag:COLUMNFIELD" + } + ] + }, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Embedded Data Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9" + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "urn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b" + }, + { + "id": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9", + "urn": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,00cce29f-b561-bb41-3557-8e19660bb5dd,PROD)", @@ -43142,6 +43370,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,618c87db-5959-338b-bcc7-6f5f4cc0b6c6,PROD)", @@ -44718,6 +44962,31 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "urn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b" + }, + { + "id": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9", + "urn": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,00cce29f-b561-bb41-3557-8e19660bb5dd,PROD)", diff --git a/metadata-ingestion/tests/integration/tableau/tableau_extract_all_project_mces_golden.json b/metadata-ingestion/tests/integration/tableau/tableau_extract_all_project_mces_golden.json index 63796970b27c1..250c43b7fc2da 100644 --- a/metadata-ingestion/tests/integration/tableau/tableau_extract_all_project_mces_golden.json +++ b/metadata-ingestion/tests/integration/tableau/tableau_extract_all_project_mces_golden.json @@ -31853,6 +31853,234 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:tableau,c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD),Name)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD),Program ID)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.DataPlatformInstance": { + "platform": "urn:li:dataPlatform:tableau" + } + }, + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/prod/tableau/default/Workbook published ds" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:jawadqu@gmail.com", + "type": "DATAOWNER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "hasExtracts": "True", + "extractLastRefreshTime": "2018-02-09T00:05:25Z", + "extractLastUpdateTime": "2018-02-09T00:05:25Z" + }, + "name": "New DataSource", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "test", + "platform": "urn:li:dataPlatform:tableau", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "Program ID", + "nullable": false, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:DIMENSION" + }, + { + "tag": "urn:li:tag:COLUMNFIELD" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "Name", + "nullable": false, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:DIMENSION" + }, + { + "tag": "urn:li:tag:COLUMNFIELD" + } + ] + }, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Embedded Data Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9" + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "urn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b" + }, + { + "id": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9", + "urn": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,00cce29f-b561-bb41-3557-8e19660bb5dd,PROD)", @@ -43302,6 +43530,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,618c87db-5959-338b-bcc7-6f5f4cc0b6c6,PROD)", @@ -44924,6 +45168,31 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "urn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b" + }, + { + "id": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9", + "urn": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,00cce29f-b561-bb41-3557-8e19660bb5dd,PROD)", diff --git a/metadata-ingestion/tests/integration/tableau/tableau_mces_golden.json b/metadata-ingestion/tests/integration/tableau/tableau_mces_golden.json index 5308f5daebea6..c5417a8d212bc 100644 --- a/metadata-ingestion/tests/integration/tableau/tableau_mces_golden.json +++ b/metadata-ingestion/tests/integration/tableau/tableau_mces_golden.json @@ -31608,6 +31608,234 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:tableau,c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD),Name)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD),Program ID)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.DataPlatformInstance": { + "platform": "urn:li:dataPlatform:tableau" + } + }, + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/prod/tableau/default/Workbook published ds" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:jawadqu@gmail.com", + "type": "DATAOWNER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "hasExtracts": "True", + "extractLastRefreshTime": "2018-02-09T00:05:25Z", + "extractLastUpdateTime": "2018-02-09T00:05:25Z" + }, + "name": "New DataSource", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "test", + "platform": "urn:li:dataPlatform:tableau", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "Program ID", + "nullable": false, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:DIMENSION" + }, + { + "tag": "urn:li:tag:COLUMNFIELD" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "Name", + "nullable": false, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:DIMENSION" + }, + { + "tag": "urn:li:tag:COLUMNFIELD" + } + ] + }, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Embedded Data Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9" + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "urn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b" + }, + { + "id": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9", + "urn": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,00cce29f-b561-bb41-3557-8e19660bb5dd,PROD)", @@ -43057,6 +43285,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,618c87db-5959-338b-bcc7-6f5f4cc0b6c6,PROD)", @@ -44633,6 +44877,31 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "urn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b" + }, + { + "id": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9", + "urn": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,00cce29f-b561-bb41-3557-8e19660bb5dd,PROD)", diff --git a/metadata-ingestion/tests/integration/tableau/tableau_mces_golden_deleted_stateful.json b/metadata-ingestion/tests/integration/tableau/tableau_mces_golden_deleted_stateful.json index b39f7a181f01b..8d7be1c4d0033 100644 --- a/metadata-ingestion/tests/integration/tableau/tableau_mces_golden_deleted_stateful.json +++ b/metadata-ingestion/tests/integration/tableau/tableau_mces_golden_deleted_stateful.json @@ -577,6 +577,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": true + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,demo_postgres_instance.dvdrental.public.actor,PROD)", diff --git a/metadata-ingestion/tests/integration/tableau/tableau_nested_project_mces_golden.json b/metadata-ingestion/tests/integration/tableau/tableau_nested_project_mces_golden.json index 17a7ed91ff36e..5e46b91c207a7 100644 --- a/metadata-ingestion/tests/integration/tableau/tableau_nested_project_mces_golden.json +++ b/metadata-ingestion/tests/integration/tableau/tableau_nested_project_mces_golden.json @@ -31853,6 +31853,234 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:tableau,c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD),Name)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD),Program ID)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.DataPlatformInstance": { + "platform": "urn:li:dataPlatform:tableau" + } + }, + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/prod/tableau/default/Workbook published ds" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:jawadqu@gmail.com", + "type": "DATAOWNER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "hasExtracts": "True", + "extractLastRefreshTime": "2018-02-09T00:05:25Z", + "extractLastUpdateTime": "2018-02-09T00:05:25Z" + }, + "name": "New DataSource", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "test", + "platform": "urn:li:dataPlatform:tableau", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "Program ID", + "nullable": false, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:DIMENSION" + }, + { + "tag": "urn:li:tag:COLUMNFIELD" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "Name", + "nullable": false, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:DIMENSION" + }, + { + "tag": "urn:li:tag:COLUMNFIELD" + } + ] + }, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Embedded Data Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9" + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "urn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b" + }, + { + "id": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9", + "urn": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,00cce29f-b561-bb41-3557-8e19660bb5dd,PROD)", @@ -43272,6 +43500,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,618c87db-5959-338b-bcc7-6f5f4cc0b6c6,PROD)", @@ -44894,6 +45138,31 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "urn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b" + }, + { + "id": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9", + "urn": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,00cce29f-b561-bb41-3557-8e19660bb5dd,PROD)", diff --git a/metadata-ingestion/tests/integration/tableau/tableau_signout_timeout_mces_golden.json b/metadata-ingestion/tests/integration/tableau/tableau_signout_timeout_mces_golden.json index 5308f5daebea6..c5417a8d212bc 100644 --- a/metadata-ingestion/tests/integration/tableau/tableau_signout_timeout_mces_golden.json +++ b/metadata-ingestion/tests/integration/tableau/tableau_signout_timeout_mces_golden.json @@ -31608,6 +31608,234 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:tableau,c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD),Name)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD),Program ID)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.DataPlatformInstance": { + "platform": "urn:li:dataPlatform:tableau" + } + }, + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/prod/tableau/default/Workbook published ds" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:jawadqu@gmail.com", + "type": "DATAOWNER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "hasExtracts": "True", + "extractLastRefreshTime": "2018-02-09T00:05:25Z", + "extractLastUpdateTime": "2018-02-09T00:05:25Z" + }, + "name": "New DataSource", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "test", + "platform": "urn:li:dataPlatform:tableau", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "Program ID", + "nullable": false, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:DIMENSION" + }, + { + "tag": "urn:li:tag:COLUMNFIELD" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "Name", + "nullable": false, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:DIMENSION" + }, + { + "tag": "urn:li:tag:COLUMNFIELD" + } + ] + }, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Embedded Data Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9" + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "urn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b" + }, + { + "id": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9", + "urn": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,00cce29f-b561-bb41-3557-8e19660bb5dd,PROD)", @@ -43057,6 +43285,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,618c87db-5959-338b-bcc7-6f5f4cc0b6c6,PROD)", @@ -44633,6 +44877,31 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "urn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b" + }, + { + "id": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9", + "urn": "urn:li:container:94e6e84b66f9ee8c70c22f06cfbad6a9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,00cce29f-b561-bb41-3557-8e19660bb5dd,PROD)", diff --git a/metadata-ingestion/tests/integration/tableau/tableau_with_platform_instance_mces_golden.json b/metadata-ingestion/tests/integration/tableau/tableau_with_platform_instance_mces_golden.json index cd957fe388926..5dc8fd9522da4 100644 --- a/metadata-ingestion/tests/integration/tableau/tableau_with_platform_instance_mces_golden.json +++ b/metadata-ingestion/tests/integration/tableau/tableau_with_platform_instance_mces_golden.json @@ -12801,7 +12801,7 @@ "time": 0, "actor": "urn:li:corpuser:unknown" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.10c6297d-0dbd-44f1-b1ba-458bea446513,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.22b0b4c3-6b85-713d-a161-5a87fdd78f40,PROD)", "type": "TRANSFORMED" }, { @@ -12809,7 +12809,7 @@ "time": 0, "actor": "urn:li:corpuser:unknown" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.22b0b4c3-6b85-713d-a161-5a87fdd78f40,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.10c6297d-0dbd-44f1-b1ba-458bea446513,PROD)", "type": "TRANSFORMED" } ], @@ -31740,6 +31740,239 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "upstreamLineage", + "aspect": { + "json": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD),name)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.5449c627-7462-4ef7-b492-bda46be068e3,PROD),Name)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.c7dd65fb-6e7e-4091-bbde-8c78b34a40f8,PROD),id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.5449c627-7462-4ef7-b492-bda46be068e3,PROD),Program ID)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.DataPlatformInstance": { + "platform": "urn:li:dataPlatform:tableau", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + } + }, + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/prod/tableau/acryl_site1/default/Workbook published ds" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:jawadqu@gmail.com", + "type": "DATAOWNER" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "hasExtracts": "True", + "extractLastRefreshTime": "2018-02-09T00:05:25Z", + "extractLastUpdateTime": "2018-02-09T00:05:25Z" + }, + "name": "New DataSource", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "test", + "platform": "urn:li:dataPlatform:tableau", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "Program ID", + "nullable": false, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:DIMENSION" + }, + { + "tag": "urn:li:tag:COLUMNFIELD" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "Name", + "nullable": false, + "description": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:DIMENSION" + }, + { + "tag": "urn:li:tag:COLUMNFIELD" + } + ] + }, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Embedded Data Source" + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:ba8a5ac7eb4c6e5edc9b03bf8891be55" + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, + { + "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", + "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" + }, + { + "id": "urn:li:container:ba8a5ac7eb4c6e5edc9b03bf8891be55", + "urn": "urn:li:container:ba8a5ac7eb4c6e5edc9b03bf8891be55" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.00cce29f-b561-bb41-3557-8e19660bb5dd,PROD)", @@ -43350,6 +43583,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.618c87db-5959-338b-bcc7-6f5f4cc0b6c6,PROD)", @@ -45080,6 +45329,35 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.5449c627-7462-4ef7-b492-bda46be068e3,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)", + "urn": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:tableau,acryl_site1)" + }, + { + "id": "urn:li:container:66fa1e14620418276c85f3b552c7ec65", + "urn": "urn:li:container:66fa1e14620418276c85f3b552c7ec65" + }, + { + "id": "urn:li:container:ba8a5ac7eb4c6e5edc9b03bf8891be55", + "urn": "urn:li:container:ba8a5ac7eb4c6e5edc9b03bf8891be55" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:tableau,acryl_site1.00cce29f-b561-bb41-3557-8e19660bb5dd,PROD)", diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py index cccf79ccbd8e0..57fcb0b6ee49a 100644 --- a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py +++ b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py @@ -16,6 +16,7 @@ ) from datahub.configuration.source_common import DEFAULT_ENV +from datahub.emitter.mce_builder import make_schema_field_urn from datahub.ingestion.run.pipeline import Pipeline, PipelineContext from datahub.ingestion.source.tableau import TableauConfig, TableauSource from datahub.ingestion.source.tableau_common import ( @@ -24,10 +25,12 @@ ) from datahub.metadata.com.linkedin.pegasus2avro.dataset import ( DatasetLineageType, + FineGrainedLineage, + FineGrainedLineageDownstreamType, + FineGrainedLineageUpstreamType, UpstreamLineage, ) from datahub.metadata.schema_classes import MetadataChangeProposalClass, UpstreamClass -from datahub.sql_parsing.sqlglot_lineage import SqlParsingResult from tests.test_helpers import mce_helpers, test_connection_helpers from tests.test_helpers.state_helpers import ( get_current_checkpoint_from_pipeline, @@ -569,6 +572,7 @@ def test_lineage_overrides(): assert ( TableauUpstreamReference( "presto_catalog", + "test-database-id", "test-schema", "test-table", "presto", @@ -583,6 +587,7 @@ def test_lineage_overrides(): assert ( TableauUpstreamReference( "presto_catalog", + "test-database-id", "test-schema", "test-table", "presto", @@ -599,6 +604,7 @@ def test_lineage_overrides(): # transform hive urn to presto urn assert ( TableauUpstreamReference( + None, None, "test-schema", "test-table", @@ -614,6 +620,40 @@ def test_lineage_overrides(): ) +def test_database_hostname_to_platform_instance_map(): + enable_logging() + # Simple - snowflake table + assert ( + TableauUpstreamReference( + "test-database-name", + "test-database-id", + "test-schema", + "test-table", + "snowflake", + ).make_dataset_urn(env=DEFAULT_ENV, platform_instance_map={}) + == "urn:li:dataset:(urn:li:dataPlatform:snowflake,test-database-name.test-schema.test-table,PROD)" + ) + + # Finding platform instance based off hostname to platform instance mappings + assert ( + TableauUpstreamReference( + "test-database-name", + "test-database-id", + "test-schema", + "test-table", + "snowflake", + ).make_dataset_urn( + env=DEFAULT_ENV, + platform_instance_map={}, + database_hostname_to_platform_instance_map={ + "test-hostname": "test-platform-instance" + }, + database_server_hostname_map={"test-database-id": "test-hostname"}, + ) + == "urn:li:dataset:(urn:li:dataPlatform:snowflake,test-platform-instance.test-database-name.test-schema.test-table,PROD)" + ) + + @freeze_time(FROZEN_TIME) def test_tableau_stateful(pytestconfig, tmp_path, mock_time, mock_datahub_graph): output_file_name: str = "tableau_mces.json" @@ -672,7 +712,7 @@ def test_tableau_stateful(pytestconfig, tmp_path, mock_time, mock_datahub_graph) state1.get_urns_not_in(type="dataset", other_checkpoint_state=state2) ) - assert len(difference_dataset_urns) == 34 + assert len(difference_dataset_urns) == 35 deleted_dataset_urns = [ "urn:li:dataset:(urn:li:dataPlatform:tableau,dfe2c02a-54b7-f7a2-39fc-c651da2f6ad8,PROD)", "urn:li:dataset:(urn:li:dataPlatform:tableau,d00f4ba6-707e-4684-20af-69eb47587cc2,PROD)", @@ -708,6 +748,7 @@ def test_tableau_stateful(pytestconfig, tmp_path, mock_time, mock_datahub_graph) "urn:li:dataset:(urn:li:dataPlatform:external,sample - superstore%2C %28new%29.xls.people,PROD)", "urn:li:dataset:(urn:li:dataPlatform:webdata-direct:servicenowitsm-servicenowitsm,ven01911.sc_cat_item,PROD)", "urn:li:dataset:(urn:li:dataPlatform:tableau,10c6297d-0dbd-44f1-b1ba-458bea446513,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:tableau,5449c627-7462-4ef7-b492-bda46be068e3,PROD)", ] assert sorted(deleted_dataset_urns) == sorted(difference_dataset_urns) @@ -805,55 +846,90 @@ def test_tableau_signout_timeout(pytestconfig, tmp_path, mock_datahub_graph): ) -def test_tableau_unsupported_csql(mock_datahub_graph): +def test_tableau_unsupported_csql(): context = PipelineContext(run_id="0", pipeline_name="test_tableau") - context.graph = mock_datahub_graph - config = TableauConfig.parse_obj(config_source_default.copy()) + config_dict = config_source_default.copy() + del config_dict["stateful_ingestion"] + config = TableauConfig.parse_obj(config_dict) config.extract_lineage_from_unsupported_custom_sql_queries = True config.lineage_overrides = TableauLineageOverrides( database_override_map={"production database": "prod"} ) - with mock.patch( - "datahub.ingestion.source.tableau.create_lineage_sql_parsed_result", - return_value=SqlParsingResult( - in_tables=[ - "urn:li:dataset:(urn:li:dataPlatform:bigquery,my_bigquery_project.invent_dw.userdetail,PROD)" - ], - out_tables=[], - column_lineage=None, - ), + def test_lineage_metadata( + lineage, expected_entity_urn, expected_upstream_table, expected_cll ): - source = TableauSource(config=config, ctx=context) - - lineage = source._create_lineage_from_unsupported_csql( - csql_urn="urn:li:dataset:(urn:li:dataPlatform:tableau,09988088-05ad-173c-a2f1-f33ba3a13d1a,PROD)", - csql={ - "query": "SELECT user_id, source, user_source FROM (SELECT *, ROW_NUMBER() OVER (partition BY user_id ORDER BY __partition_day DESC) AS rank_ FROM invent_dw.UserDetail ) source_user WHERE rank_ = 1", - "isUnsupportedCustomSql": "true", - "database": { - "name": "my-bigquery-project", - "connectionType": "bigquery", - }, - }, - out_columns=[], - ) - mcp = cast(MetadataChangeProposalClass, next(iter(lineage)).metadata) - assert mcp.aspect == UpstreamLineage( upstreams=[ UpstreamClass( - dataset="urn:li:dataset:(urn:li:dataPlatform:bigquery,my_bigquery_project.invent_dw.userdetail,PROD)", + dataset=expected_upstream_table, type=DatasetLineageType.TRANSFORMED, ) ], - fineGrainedLineages=[], - ) - assert ( - mcp.entityUrn - == "urn:li:dataset:(urn:li:dataPlatform:tableau,09988088-05ad-173c-a2f1-f33ba3a13d1a,PROD)" + fineGrainedLineages=[ + FineGrainedLineage( + upstreamType=FineGrainedLineageUpstreamType.FIELD_SET, + upstreams=[ + make_schema_field_urn(expected_upstream_table, upstream_column) + ], + downstreamType=FineGrainedLineageDownstreamType.FIELD, + downstreams=[ + make_schema_field_urn(expected_entity_urn, downstream_column) + ], + ) + for upstream_column, downstream_column in expected_cll.items() + ], ) + assert mcp.entityUrn == expected_entity_urn + + csql_urn = "urn:li:dataset:(urn:li:dataPlatform:tableau,09988088-05ad-173c-a2f1-f33ba3a13d1a,PROD)" + expected_upstream_table = "urn:li:dataset:(urn:li:dataPlatform:bigquery,my_bigquery_project.invent_dw.UserDetail,PROD)" + expected_cll = { + "user_id": "user_id", + "source": "source", + "user_source": "user_source", + } + + source = TableauSource(config=config, ctx=context) + + lineage = source._create_lineage_from_unsupported_csql( + csql_urn=csql_urn, + csql={ + "query": "SELECT user_id, source, user_source FROM (SELECT *, ROW_NUMBER() OVER (partition BY user_id ORDER BY __partition_day DESC) AS rank_ FROM invent_dw.UserDetail ) source_user WHERE rank_ = 1", + "isUnsupportedCustomSql": "true", + "connectionType": "bigquery", + "database": { + "name": "my_bigquery_project", + "connectionType": "bigquery", + }, + }, + out_columns=[], + ) + test_lineage_metadata( + lineage=lineage, + expected_entity_urn=csql_urn, + expected_upstream_table=expected_upstream_table, + expected_cll=expected_cll, + ) + + # With database as None + lineage = source._create_lineage_from_unsupported_csql( + csql_urn=csql_urn, + csql={ + "query": "SELECT user_id, source, user_source FROM (SELECT *, ROW_NUMBER() OVER (partition BY user_id ORDER BY __partition_day DESC) AS rank_ FROM my_bigquery_project.invent_dw.UserDetail ) source_user WHERE rank_ = 1", + "isUnsupportedCustomSql": "true", + "connectionType": "bigquery", + "database": None, + }, + out_columns=[], + ) + test_lineage_metadata( + lineage=lineage, + expected_entity_urn=csql_urn, + expected_upstream_table=expected_upstream_table, + expected_cll=expected_cll, + ) @freeze_time(FROZEN_TIME) diff --git a/metadata-ingestion/tests/performance/README.md b/metadata-ingestion/tests/performance/README.md index 571aa8c88a428..2749f43fa31e0 100644 --- a/metadata-ingestion/tests/performance/README.md +++ b/metadata-ingestion/tests/performance/README.md @@ -1,6 +1,10 @@ # Performance Testing + This module provides a framework for performance testing our ingestion sources. ```bash python -m tests.performance. + +# For example: +python -m tests.performance.snowflake.test_snowflake ``` diff --git a/metadata-ingestion/tests/performance/bigquery/test_bigquery_usage.py b/metadata-ingestion/tests/performance/bigquery/test_bigquery_usage.py index 124db588fe08a..92ad7b383dc1c 100644 --- a/metadata-ingestion/tests/performance/bigquery/test_bigquery_usage.py +++ b/metadata-ingestion/tests/performance/bigquery/test_bigquery_usage.py @@ -92,7 +92,5 @@ def run_test(): if __name__ == "__main__": - root_logger = logging.getLogger() - root_logger.setLevel(logging.INFO) - root_logger.addHandler(logging.StreamHandler()) + logging.basicConfig(level=logging.INFO) run_test() diff --git a/metadata-ingestion/tests/performance/databricks/test_unity.py b/metadata-ingestion/tests/performance/databricks/test_unity.py index cc9558f0692ed..6592ffe5198c1 100644 --- a/metadata-ingestion/tests/performance/databricks/test_unity.py +++ b/metadata-ingestion/tests/performance/databricks/test_unity.py @@ -65,7 +65,5 @@ def run_test(): if __name__ == "__main__": - root_logger = logging.getLogger() - root_logger.setLevel(logging.INFO) - root_logger.addHandler(logging.StreamHandler()) + logging.basicConfig(level=logging.INFO) run_test() diff --git a/metadata-ingestion/tests/performance/helpers.py b/metadata-ingestion/tests/performance/helpers.py index eb98e53670c96..9bfd9ebc8de0d 100644 --- a/metadata-ingestion/tests/performance/helpers.py +++ b/metadata-ingestion/tests/performance/helpers.py @@ -9,7 +9,7 @@ def workunit_sink(workunits: Iterable[MetadataWorkUnit]) -> Tuple[int, int]: peak_memory_usage = psutil.Process(os.getpid()).memory_info().rss i: int = 0 - for i, wu in enumerate(workunits): + for i, _wu in enumerate(workunits): if i % 10_000 == 0: peak_memory_usage = max( peak_memory_usage, psutil.Process(os.getpid()).memory_info().rss diff --git a/metadata-ingestion/tests/performance/snowflake/__init__.py b/metadata-ingestion/tests/performance/snowflake/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/metadata-ingestion/tests/performance/snowflake/test_snowflake.py b/metadata-ingestion/tests/performance/snowflake/test_snowflake.py new file mode 100644 index 0000000000000..73b7790b62e9e --- /dev/null +++ b/metadata-ingestion/tests/performance/snowflake/test_snowflake.py @@ -0,0 +1,69 @@ +import functools +import logging +import os +from datetime import datetime, timezone +from unittest import mock + +import humanfriendly +import psutil + +from datahub.ingestion.api.common import PipelineContext +from datahub.ingestion.source.snowflake.snowflake_config import SnowflakeV2Config +from datahub.ingestion.source.snowflake.snowflake_v2 import SnowflakeV2Source +from datahub.utilities.perf_timer import PerfTimer +from tests.integration.snowflake.common import default_query_results +from tests.performance.helpers import workunit_sink + + +def run_test(): + + with mock.patch("snowflake.connector.connect") as mock_connect: + sf_connection = mock.MagicMock() + sf_cursor = mock.MagicMock() + mock_connect.return_value = sf_connection + sf_connection.cursor.return_value = sf_cursor + + sf_cursor.execute.side_effect = functools.partial( + default_query_results, + num_tables=30000, + num_views=10000, + num_cols=30, + num_ops=30, + num_usages=500, + ) + + config = SnowflakeV2Config( + account_id="ABC12345.ap-south-1.aws", + username="TST_USR", + password="TST_PWD", + include_technical_schema=False, + include_table_lineage=True, + include_view_lineage=True, + include_usage_stats=True, + include_operational_stats=True, + start_time=datetime(2022, 6, 6, 0, 0, 0, 0).replace(tzinfo=timezone.utc), + end_time=datetime(2022, 6, 7, 7, 17, 0, 0).replace(tzinfo=timezone.utc), + format_sql_queries=True, + ) + ctx = PipelineContext(run_id="test") + source = SnowflakeV2Source(ctx, config) + + pre_mem_usage = psutil.Process(os.getpid()).memory_info().rss + logging.info(f"Test data size: {humanfriendly.format_size(pre_mem_usage)}") + + with PerfTimer() as timer: + workunits = source.get_workunits() + num_workunits, peak_memory_usage = workunit_sink(workunits) + logging.info(f"Workunits Generated: {num_workunits}") + logging.info(f"Seconds Elapsed: {timer.elapsed_seconds():.2f} seconds") + logging.info(source.get_report().as_string()) + + logging.info( + f"Peak Memory Used: {humanfriendly.format_size(peak_memory_usage - pre_mem_usage)}" + ) + logging.info(source.report.aspects) + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + run_test() diff --git a/metadata-ingestion/tests/performance/sql/test_sql_formatter.py b/metadata-ingestion/tests/performance/sql/test_sql_formatter.py new file mode 100644 index 0000000000000..5f783efc559bc --- /dev/null +++ b/metadata-ingestion/tests/performance/sql/test_sql_formatter.py @@ -0,0 +1,25 @@ +import logging + +from datahub.sql_parsing.sqlglot_utils import try_format_query +from datahub.utilities.perf_timer import PerfTimer +from tests.integration.snowflake.common import large_sql_query + + +def run_test() -> None: + N = 500 + + with PerfTimer() as timer: + for i in range(N): + if i % 50 == 0: + print( + f"Running iteration {i}, elapsed time: {timer.elapsed_seconds():.2f} seconds" + ) + + try_format_query.__wrapped__(large_sql_query, platform="snowflake") + + print(f"Total time taken for {N} iterations: {timer.elapsed_seconds():.2f} seconds") + + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + run_test() diff --git a/metadata-ingestion/tests/test_helpers/mce_helpers.py b/metadata-ingestion/tests/test_helpers/mce_helpers.py index 1445f402f78b9..9ee4642bfe6eb 100644 --- a/metadata-ingestion/tests/test_helpers/mce_helpers.py +++ b/metadata-ingestion/tests/test_helpers/mce_helpers.py @@ -81,6 +81,7 @@ def check_golden_file( output_path: Union[str, os.PathLike], golden_path: Union[str, os.PathLike], ignore_paths: Sequence[str] = (), + ignore_paths_v2: Sequence[str] = (), ) -> None: update_golden = pytestconfig.getoption("--update-golden-files") copy_output = pytestconfig.getoption("--copy-output-files") @@ -90,6 +91,7 @@ def check_golden_file( update_golden=update_golden, copy_output=copy_output, ignore_paths=ignore_paths, + ignore_paths_v2=ignore_paths_v2, ) @@ -172,20 +174,16 @@ def get_entity_urns(events_file: str) -> Set[str]: def _get_entity_urns(events_list: List[Dict]) -> Set[str]: entity_type = "dataset" # mce urns - mce_urns = set( - [ - _get_element(x, _get_mce_urn_path_spec(entity_type)) - for x in events_list - if _get_filter(mce=True, entity_type=entity_type)(x) - ] - ) - mcp_urns = set( - [ - _get_element(x, _get_mcp_urn_path_spec()) - for x in events_list - if _get_filter(mcp=True, entity_type=entity_type)(x) - ] - ) + mce_urns = { + _get_element(x, _get_mce_urn_path_spec(entity_type)) + for x in events_list + if _get_filter(mce=True, entity_type=entity_type)(x) + } + mcp_urns = { + _get_element(x, _get_mcp_urn_path_spec()) + for x in events_list + if _get_filter(mcp=True, entity_type=entity_type)(x) + } all_urns = mce_urns.union(mcp_urns) return all_urns @@ -266,20 +264,16 @@ def assert_for_each_entity( test_output = load_json_file(file) assert isinstance(test_output, list) # mce urns - mce_urns = set( - [ - _get_element(x, _get_mce_urn_path_spec(entity_type)) - for x in test_output - if _get_filter(mce=True, entity_type=entity_type)(x) - ] - ) - mcp_urns = set( - [ - _get_element(x, _get_mcp_urn_path_spec()) - for x in test_output - if _get_filter(mcp=True, entity_type=entity_type)(x) - ] - ) + mce_urns = { + _get_element(x, _get_mce_urn_path_spec(entity_type)) + for x in test_output + if _get_filter(mce=True, entity_type=entity_type)(x) + } + mcp_urns = { + _get_element(x, _get_mcp_urn_path_spec()) + for x in test_output + if _get_filter(mcp=True, entity_type=entity_type)(x) + } all_urns = mce_urns.union(mcp_urns) # there should not be any None urns assert None not in all_urns @@ -376,20 +370,16 @@ def assert_entity_urn_not_like(entity_type: str, regex_pattern: str, file: str) test_output = load_json_file(file) assert isinstance(test_output, list) # mce urns - mce_urns = set( - [ - _get_element(x, _get_mce_urn_path_spec(entity_type)) - for x in test_output - if _get_filter(mce=True, entity_type=entity_type)(x) - ] - ) - mcp_urns = set( - [ - _get_element(x, _get_mcp_urn_path_spec()) - for x in test_output - if _get_filter(mcp=True, entity_type=entity_type)(x) - ] - ) + mce_urns = { + _get_element(x, _get_mce_urn_path_spec(entity_type)) + for x in test_output + if _get_filter(mce=True, entity_type=entity_type)(x) + } + mcp_urns = { + _get_element(x, _get_mcp_urn_path_spec()) + for x in test_output + if _get_filter(mcp=True, entity_type=entity_type)(x) + } all_urns = mce_urns.union(mcp_urns) print(all_urns) matched_urns = [u for u in all_urns if re.match(regex_pattern, u)] @@ -404,20 +394,16 @@ def assert_entity_urn_like(entity_type: str, regex_pattern: str, file: str) -> i test_output = load_json_file(file) assert isinstance(test_output, list) # mce urns - mce_urns = set( - [ - _get_element(x, _get_mce_urn_path_spec(entity_type)) - for x in test_output - if _get_filter(mce=True, entity_type=entity_type)(x) - ] - ) - mcp_urns = set( - [ - _get_element(x, _get_mcp_urn_path_spec()) - for x in test_output - if _get_filter(mcp=True, entity_type=entity_type)(x) - ] - ) + mce_urns = { + _get_element(x, _get_mce_urn_path_spec(entity_type)) + for x in test_output + if _get_filter(mce=True, entity_type=entity_type)(x) + } + mcp_urns = { + _get_element(x, _get_mcp_urn_path_spec()) + for x in test_output + if _get_filter(mcp=True, entity_type=entity_type)(x) + } all_urns = mce_urns.union(mcp_urns) print(all_urns) matched_urns = [u for u in all_urns if re.match(regex_pattern, u)] diff --git a/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py b/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py index 0f98054ab1d38..2d43b24e10763 100644 --- a/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py +++ b/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_helper.py @@ -1,9 +1,11 @@ from typing import List +import datahub.emitter.mce_builder as builder import datahub.metadata.schema_classes as models from datahub.emitter.mce_builder import make_dataset_urn, make_schema_field_urn from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.incremental_lineage_helper import auto_incremental_lineage +from datahub.ingestion.api.source_helpers import auto_workunit from datahub.ingestion.sink.file import write_metadata_file from tests.test_helpers import mce_helpers @@ -143,3 +145,40 @@ def test_incremental_column_lineage(tmp_path, pytestconfig): mce_helpers.check_golden_file( pytestconfig=pytestconfig, output_path=test_file, golden_path=golden_file ) + + +def test_incremental_lineage_pass_through(tmp_path, pytestconfig): + test_resources_dir = pytestconfig.rootpath / "tests/unit/api/source_helpers" + test_file = tmp_path / "test_incremental_lineage_pass_through.json" + golden_file = test_resources_dir / "test_incremental_lineage_pass_through.json" + + urn = builder.make_dataset_urn("bigquery", "downstream") + dataset_mce = builder.make_lineage_mce( + [ + builder.make_dataset_urn("bigquery", "upstream1"), + builder.make_dataset_urn("bigquery", "upstream2"), + ], + urn, + ) + props = models.DatasetPropertiesClass(name="downstream") + assert isinstance(dataset_mce.proposedSnapshot, models.DatasetSnapshotClass) + dataset_mce.proposedSnapshot.aspects.append(props) + + ownership = MetadataChangeProposalWrapper( + entityUrn=urn, + aspect=models.OwnershipClass(owners=[]), + systemMetadata=system_metadata, + ) + + processed_wus = auto_incremental_lineage( + incremental_lineage=True, + stream=auto_workunit([dataset_mce, ownership]), + ) + + write_metadata_file( + test_file, + [wu.metadata for wu in processed_wus], + ) + mce_helpers.check_golden_file( + pytestconfig=pytestconfig, output_path=test_file, golden_path=golden_file + ) diff --git a/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_pass_through.json b/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_pass_through.json new file mode 100644 index 0000000000000..61913a7b7a91a --- /dev/null +++ b/metadata-ingestion/tests/unit/api/source_helpers/test_incremental_lineage_pass_through.json @@ -0,0 +1,73 @@ +[ +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,downstream,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "downstream", + "tags": [] + } + } + ] + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,downstream,PROD)", + "changeType": "PATCH", + "aspectName": "upstreamLineage", + "aspect": { + "json": [ + { + "op": "add", + "path": "/upstreams/urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream1,PROD)", + "value": { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream1,PROD)", + "type": "TRANSFORMED" + } + }, + { + "op": "add", + "path": "/upstreams/urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream2,PROD)", + "value": { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream2,PROD)", + "type": "TRANSFORMED" + } + } + ] + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,downstream,PROD)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "run-id", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/api/source_helpers/test_source_helpers.py b/metadata-ingestion/tests/unit/api/source_helpers/test_source_helpers.py index f28c7167ca319..d995404ad69a5 100644 --- a/metadata-ingestion/tests/unit/api/source_helpers/test_source_helpers.py +++ b/metadata-ingestion/tests/unit/api/source_helpers/test_source_helpers.py @@ -201,7 +201,7 @@ def test_auto_browse_path_v2_by_container_hierarchy(telemetry_ping_mock): assert paths["i"] == _make_container_browse_path_entries(["one", "a"]) # Check urns emitted on demand -- not all at end - for urn in set(wu.get_urn() for wu in new_wus): + for urn in {wu.get_urn() for wu in new_wus}: try: idx = next( i diff --git a/metadata-ingestion/tests/unit/config/test_config_loader.py b/metadata-ingestion/tests/unit/config/test_config_loader.py index f9a4076e18363..25ee289ec4e4e 100644 --- a/metadata-ingestion/tests/unit/config/test_config_loader.py +++ b/metadata-ingestion/tests/unit/config/test_config_loader.py @@ -52,7 +52,7 @@ "VAR1": "stuff1", "VAR2": "stuff2", }, - set(["VAR1", "UNSET_VAR3", "VAR2"]), + {"VAR1", "UNSET_VAR3", "VAR2"}, ), ( "tests/unit/config/complex_variable_expansion.yml", @@ -107,22 +107,20 @@ "VAR10": "stuff10", "VAR11": "stuff11", }, - set( - [ - "VAR1", - "VAR2", - "VAR3", - "VAR4", - "VAR5", - "VAR6", - "VAR7", - "VAR8", - "VAR9", - "VAR10", - # VAR11 is escaped and hence not referenced - "VARNONEXISTENT", - ] - ), + { + "VAR1", + "VAR2", + "VAR3", + "VAR4", + "VAR5", + "VAR6", + "VAR7", + "VAR8", + "VAR9", + "VAR10", + # VAR11 is escaped and hence not referenced + "VARNONEXISTENT", + }, ), ], ) diff --git a/metadata-ingestion/tests/unit/glue/glue_delta_mces_golden.json b/metadata-ingestion/tests/unit/glue/glue_delta_mces_golden.json new file mode 100644 index 0000000000000..f3d4812b79124 --- /dev/null +++ b/metadata-ingestion/tests/unit/glue/glue_delta_mces_golden.json @@ -0,0 +1,1548 @@ +[ +{ + "entityType": "container", + "entityUrn": "urn:li:container:275c7ea5ecf956fd8d45e14228757a8a", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "glue", + "instance": "delta_platform_instance", + "env": "PROD", + "database": "delta-database" + }, + "name": "delta-database", + "qualifiedName": "arn:aws:glue:us-west-2:123412341234:database/delta-database" + } + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:275c7ea5ecf956fd8d45e14228757a8a", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:275c7ea5ecf956fd8d45e14228757a8a", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:glue", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:glue,delta_platform_instance)" + } + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:275c7ea5ecf956fd8d45e14228757a8a", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Database" + ] + } + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:glue,delta_platform_instance.delta-database.delta_table_1,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "spark.sql.sources.provider": "delta", + "spark.sql.sources.schema.numParts": "3", + "spark.sql.sources.schema.part.0": "{\"type\":\"struct\",\"fields\":[{\"name\":\"ecg_session_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"page_type_txt\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clsfd_pltfrm_id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clsfd_dvic_id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"ga_vstr_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"src_ad_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clsfd_ad_id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clsfd_user_id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"src_categ_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clsfd_categ_ref_id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clsfd_geo_ref_id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"src_loc_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"geo_region_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"geo_cntry_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"geo_city_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"ga_prfl_id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clsfd_ga_prfl_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"ga_vst_id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"app_vrsn_txt\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"chnl_group\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clsfd_trffc_chnl_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"vst_mdm_txt\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"vst_src_txt\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"vst_src_cmpgn_code\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"vst_src_cmpgn_txt\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"vst_src_cntnt_txt\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"vst_src_ad_kywrd_txt\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clsfd_vst_drtn_num\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"home_page_txt\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"session_start_time_num\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"brwsr_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"brwsr_vrsn_txt\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clsfd_is_user_login_flag\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"lang_code\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clsfd_is_direct_flag\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"os_vrsn_txt\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"os_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"host_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"vst_src_is_direct_flag\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clsfd_is_session_flag\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"app_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"encypted_user_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"decypted_user_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"encrptd_email\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"page_path_txt\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"scrn_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"socl_engmnt_type\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"vst_src_path_txt\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"adword_user_id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"adword_cmpgn_id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"adword_adgroup_id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"adword_crtv_id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"adword_criteria_id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"adword_criteria_param_txt\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"adword_page_num\",\"type\":\"long\",\"nullable\":true,\"metadata\":{", + "spark.sql.sources.schema.part.1": "}},{\"name\":\"adword_slot_txt\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"adword_click_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"adword_ntwrk_type\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"adword_is_videoad_flag\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"adword_criteria_boomuserlist_id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"brwsr_size_txt\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"mbl_dvic_brand_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"mbl_dvic_model_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"mbl_input_slctr_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"mbl_dvic_info_txt\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"mbl_dvic_mkt_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"flash_vrsn_txt\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"is_java_enabled_flag\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"scrn_color_txt\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"scrn_rsln_txt\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"geo_cntint_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"geo_subcntint_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"geo_metro_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"geo_city_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"geo_ntwrk_dmn_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"geo_ltitd\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"geo_lngtd\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"geo_ntwrk_loc_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"session_ab_test_group_txt\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clsfd_vst_cnt\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clsfd_new_vstr_cnt\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clsfd_vst_num\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clsfd_hit_cnt\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clsfd_pv_cnt\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clsfd_srp_pv_cnt\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clsfd_unq_srp_pv_cnt\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clsfd_vip_pv_cnt\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clsfd_unq_vip_pv_cnt\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clsfd_scrn_view_cnt\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clsfd_uniq_scrn_view_cnt\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clsfd_scrn_drtn_num\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clsfd_bnc_cnt\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clsfd_trxn_cnt\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clsfd_trxn_rev_amt\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"ga_session_list_array\",\"type\":{\"type\":\"array\",\"elementType\":{\"type\":\"struct\",\"fields\":[{\"name\":\"clsfd_session_id\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clsfd_sum_dt\",\"type\":\"date\",\"nullable\":true,\"metadata\":{}}]},\"containsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"sess_cd\",\"type\":{\"type\":\"map\",\"keyType\":\"integer\",\"valueType\":\"string\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"lp_hit_cd\",\"type\":{\"type\":\"map\",\"keyType\":\"integer\",\"valueType\":\"string\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"ext_map\",\"type\":{\"type\":\"map\",\"keyType\":\"integer\",\"valueType\":\"string\",\"valueContainsNull\":true},\"nullable\":true,\"metadata\":{}},{\"name\":\"clsfd_cntry_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"cre_date\",\"type\":\"date\",\"nullable\":true,\"metadata\":{}},{\"name\":\"cre_user\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"upd_date\",\"type\":\"date\",\"nullable\":true,\"metadata\":{}},{\"name\":\"upd_user\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"clsfd_site_id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"", + "spark.sql.sources.schema.part.2": "name\":\"ecg_session_start_dt\",\"type\":\"date\",\"nullable\":true,\"metadata\":{}}]}", + "Location": "s3://crawler-public-us-west-2/delta/" + }, + "name": "delta_table_1", + "qualifiedName": "arn:aws:glue:us-west-2:123412341234:table/delta-database/delta_table_1", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "delta-database.delta_table_1", + "platform": "urn:li:dataPlatform:glue", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "[version=2.0].[type=string].ecg_session_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].page_type_txt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=int].clsfd_pltfrm_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"integer\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=int].clsfd_dvic_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"integer\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].ga_vstr_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].src_ad_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=long].clsfd_ad_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "long", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"long\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=long].clsfd_user_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "long", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"long\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].src_categ_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=int].clsfd_categ_ref_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"integer\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=int].clsfd_geo_ref_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"integer\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].src_loc_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].geo_region_name", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].geo_cntry_name", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].geo_city_name", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=int].ga_prfl_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"integer\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].clsfd_ga_prfl_name", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=int].ga_vst_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"integer\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].app_vrsn_txt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].chnl_group", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].clsfd_trffc_chnl_name", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].vst_mdm_txt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].vst_src_txt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].vst_src_cmpgn_code", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].vst_src_cmpgn_txt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].vst_src_cntnt_txt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].vst_src_ad_kywrd_txt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=long].clsfd_vst_drtn_num", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "long", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"long\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].home_page_txt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=int].session_start_time_num", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"integer\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].brwsr_name", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].brwsr_vrsn_txt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=int].clsfd_is_user_login_flag", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"integer\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].lang_code", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=int].clsfd_is_direct_flag", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"integer\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].os_vrsn_txt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].os_name", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].host_name", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=int].vst_src_is_direct_flag", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"integer\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=int].clsfd_is_session_flag", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"integer\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].app_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].encypted_user_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].decypted_user_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].encrptd_email", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].page_path_txt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].scrn_name", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].socl_engmnt_type", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].vst_src_path_txt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=long].adword_user_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "long", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"long\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=long].adword_cmpgn_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "long", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"long\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=long].adword_adgroup_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "long", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"long\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=long].adword_crtv_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "long", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"long\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=long].adword_criteria_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "long", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"long\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].adword_criteria_param_txt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=long].adword_page_num", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "long", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"long\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].adword_slot_txt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].adword_click_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].adword_ntwrk_type", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=int].adword_is_videoad_flag", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"integer\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=long].adword_criteria_boomuserlist_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "long", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"long\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].brwsr_size_txt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].mbl_dvic_brand_name", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].mbl_dvic_model_name", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].mbl_input_slctr_name", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].mbl_dvic_info_txt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].mbl_dvic_mkt_name", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].flash_vrsn_txt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=int].is_java_enabled_flag", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"integer\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].scrn_color_txt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].scrn_rsln_txt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].geo_cntint_name", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].geo_subcntint_name", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].geo_metro_name", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].geo_city_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].geo_ntwrk_dmn_name", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].geo_ltitd", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].geo_lngtd", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].geo_ntwrk_loc_name", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].session_ab_test_group_txt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=long].clsfd_vst_cnt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "long", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"long\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=long].clsfd_new_vstr_cnt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "long", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"long\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=long].clsfd_vst_num", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "long", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"long\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=long].clsfd_hit_cnt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "long", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"long\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=long].clsfd_pv_cnt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "long", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"long\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=long].clsfd_srp_pv_cnt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "long", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"long\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=long].clsfd_unq_srp_pv_cnt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "long", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"long\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=long].clsfd_vip_pv_cnt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "long", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"long\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=long].clsfd_unq_vip_pv_cnt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "long", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"long\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=long].clsfd_scrn_view_cnt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "long", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"long\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=long].clsfd_uniq_scrn_view_cnt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "long", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"long\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=long].clsfd_scrn_drtn_num", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "long", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"long\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=long].clsfd_bnc_cnt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "long", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"long\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=long].clsfd_trxn_cnt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "long", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"long\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=long].clsfd_trxn_rev_amt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "long", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"long\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].ga_session_list_array", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.ArrayType": { + "nestedType": [ + "record" + ] + } + } + }, + "nativeDataType": "array>", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"array>\"}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].ga_session_list_array.[type=string].clsfd_session_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=struct].[type=array].[type=struct].ga_session_list_array.[type=int].clsfd_sum_dt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.DateType": {} + } + }, + "nativeDataType": "date", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"logicalType\": \"date\", \"native_data_type\": \"date\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=null].sess_cd", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NullType": {} + } + }, + "nativeDataType": "", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=null].lp_hit_cd", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NullType": {} + } + }, + "nativeDataType": "", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=null].ext_map", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NullType": {} + } + }, + "nativeDataType": "", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].clsfd_cntry_name", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=int].cre_date", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.DateType": {} + } + }, + "nativeDataType": "date", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"logicalType\": \"date\", \"native_data_type\": \"date\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].cre_user", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=int].upd_date", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.DateType": {} + } + }, + "nativeDataType": "date", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"logicalType\": \"date\", \"native_data_type\": \"date\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=string].upd_user", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"string\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=int].clsfd_site_id", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "integer", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"native_data_type\": \"integer\", \"_nullable\": true}" + }, + { + "fieldPath": "[version=2.0].[type=int].ecg_session_start_dt", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.DateType": {} + } + }, + "nativeDataType": "date", + "recursive": false, + "isPartOfKey": false, + "jsonProps": "{\"logicalType\": \"date\", \"native_data_type\": \"date\", \"_nullable\": true}" + } + ] + } + }, + { + "com.linkedin.pegasus2avro.common.DataPlatformInstance": { + "platform": "urn:li:dataPlatform:glue", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:glue,delta_platform_instance)" + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:owner", + "type": "DATAOWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,delta_platform_instance.delta-database.delta_table_1,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,delta_platform_instance.delta-database.delta_table_1,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:275c7ea5ecf956fd8d45e14228757a8a" + } + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/glue/glue_malformed_delta_mces_golden.json b/metadata-ingestion/tests/unit/glue/glue_malformed_delta_mces_golden.json new file mode 100644 index 0000000000000..015daaa27162f --- /dev/null +++ b/metadata-ingestion/tests/unit/glue/glue_malformed_delta_mces_golden.json @@ -0,0 +1,128 @@ +[ +{ + "entityType": "container", + "entityUrn": "urn:li:container:275c7ea5ecf956fd8d45e14228757a8a", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "glue", + "instance": "delta_platform_instance", + "env": "PROD", + "database": "delta-database" + }, + "name": "delta-database", + "qualifiedName": "arn:aws:glue:us-west-2:123412341234:database/delta-database" + } + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:275c7ea5ecf956fd8d45e14228757a8a", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:275c7ea5ecf956fd8d45e14228757a8a", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:glue", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:glue,delta_platform_instance)" + } + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:275c7ea5ecf956fd8d45e14228757a8a", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Database" + ] + } + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:glue,delta_platform_instance.delta-database.delta_table_1,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "spark.sql.sources.provider": "delta", + "spark.sql.sources.schema.numParts": "1", + "spark.sql.sources.schema.part.0": "this is totally wrong!", + "Location": "s3://crawler-public-us-west-2/delta/" + }, + "name": "delta_table_1", + "qualifiedName": "arn:aws:glue:us-west-2:123412341234:table/delta-database/delta_table_1", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.DataPlatformInstance": { + "platform": "urn:li:dataPlatform:glue", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:glue,delta_platform_instance)" + } + }, + { + "com.linkedin.pegasus2avro.common.Ownership": { + "owners": [ + { + "owner": "urn:li:corpuser:owner", + "type": "DATAOWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + } + ] + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,delta_platform_instance.delta-database.delta_table_1,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,delta_platform_instance.delta-database.delta_table_1,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:275c7ea5ecf956fd8d45e14228757a8a" + } + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_unnamed_column_udf.json b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_unnamed_column_udf.json new file mode 100644 index 0000000000000..2938298280fea --- /dev/null +++ b/metadata-ingestion/tests/unit/sql_parsing/goldens/test_snowflake_unnamed_column_udf.json @@ -0,0 +1,58 @@ +{ + "query_type": "SELECT", + "query_type_props": {}, + "query_fingerprint": "f2ac7c7d2236fce51b0d7c2b3f0e1a3c26cf6f26566b77f1d9084b7ab9c1d021", + "in_tables": [ + "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.my_table,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.my_table_b,PROD)" + ], + "out_tables": [], + "column_lineage": [ + { + "downstream": { + "table": null, + "column": "id", + "column_type": null, + "native_column_type": null + }, + "upstreams": [ + { + "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.my_table,PROD)", + "column": "id" + } + ] + }, + { + "downstream": { + "table": null, + "column": "name", + "column_type": null, + "native_column_type": null + }, + "upstreams": [ + { + "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.my_table_b,PROD)", + "column": "name" + } + ] + }, + { + "downstream": { + "table": null, + "column": "address", + "column_type": null, + "native_column_type": null + }, + "upstreams": [ + { + "table": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_db.my_schema.my_table_b,PROD)", + "column": "address" + } + ] + } + ], + "debug_info": { + "confidence": 0.2, + "generalized_statement": "SELECT A.ID, B.NAME, PARSE_JSON(B.MY_JSON) AS :userInfo, B.ADDRESS FROM my_db.my_schema.my_table AS A LEFT JOIN my_db.my_schema.my_table_B AS B ON A.ID = B.ID" + } +} \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sql_detach.py b/metadata-ingestion/tests/unit/sql_parsing/test_sql_detach.py index b45e2d0eff693..759f5a1d19242 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/test_sql_detach.py +++ b/metadata-ingestion/tests/unit/sql_parsing/test_sql_detach.py @@ -12,7 +12,7 @@ def test_detach_ctes_simple(): assert ( detached - == "WITH __cte_0 AS (SELECT * FROM table1) SELECT * FROM table2 JOIN _my_cte_table ON table2.id = _my_cte_table.id" + == "SELECT * FROM table2 JOIN _my_cte_table ON table2.id = _my_cte_table.id" ) @@ -27,7 +27,7 @@ def test_detach_ctes_with_alias(): assert ( detached - == "WITH __cte_0 AS (SELECT * FROM table1) SELECT * FROM table2 JOIN _my_cte_table AS tablealias ON table2.id = tablealias.id" + == "SELECT * FROM table2 JOIN _my_cte_table AS tablealias ON table2.id = tablealias.id" ) @@ -42,5 +42,5 @@ def test_detach_ctes_with_multipart_replacement(): assert ( detached - == "WITH __cte_0 AS (SELECT * FROM table1) SELECT * FROM table2 JOIN my_db.my_schema.my_table ON table2.id = my_db.my_schema.my_table.id" + == "SELECT * FROM table2 JOIN my_db.my_schema.my_table ON table2.id = my_db.my_schema.my_table.id" ) diff --git a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py index c1d629bc6706e..54f8bce7c32df 100644 --- a/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py +++ b/metadata-ingestion/tests/unit/sql_parsing/test_sqlglot_lineage.py @@ -1108,3 +1108,20 @@ def test_redshift_system_automove() -> None: default_db="my_db", expected_file=RESOURCE_DIR / "test_redshift_system_automove.json", ) + + +def test_snowflake_with_unnamed_column_from_udf_call() -> None: + assert_sql_result( + """SELECT + A.ID, + B.NAME, + PARSE_JSON(B.MY_JSON) AS :userInfo, + B.ADDRESS +FROM my_db.my_schema.my_table AS A +LEFT JOIN my_db.my_schema.my_table_B AS B + ON A.ID = B.ID +""", + dialect="snowflake", + default_db="my_db", + expected_file=RESOURCE_DIR / "test_snowflake_unnamed_column_udf.json", + ) diff --git a/metadata-ingestion/tests/unit/stateful_ingestion/state/golden_test_checkpoint_state.json b/metadata-ingestion/tests/unit/stateful_ingestion/state/golden_test_checkpoint_state.json index 4e62492918bfb..fcf73d9614f24 100644 --- a/metadata-ingestion/tests/unit/stateful_ingestion/state/golden_test_checkpoint_state.json +++ b/metadata-ingestion/tests/unit/stateful_ingestion/state/golden_test_checkpoint_state.json @@ -16,8 +16,8 @@ "config": "", "state": { "formatVersion": "1.0", - "serde": "base85-bz2-json", - "payload": "LRx4!F+o`-Q(1w>5G4QrYoCBnWH=B60MH7jr`{?c0BA?5L)2-AGyu>6y;V<9hz%Mv0Bt1*)lOMzr>a0|Iq-4VtTsYONQsFPLn1EpdQS;HIy|&CvSAlRvAJwmtCEM+Rx(v_)~sVvkx3V@WX4O`=losC6yZWb2OL0@" + "serde": "utf-8", + "payload": "{\"urns\": [\"urn:li:dataset:(urn:li:dataPlatform:postgres,dummy_dataset1,PROD)\", \"urn:li:dataset:(urn:li:dataPlatform:postgres,dummy_dataset2,PROD)\", \"urn:li:dataset:(urn:li:dataPlatform:postgres,dummy_dataset3,PROD)\"]}" }, "runId": "dummy-test-stateful-ingestion" } diff --git a/metadata-ingestion/tests/unit/stateful_ingestion/state/golden_test_checkpoint_state_after_deleted.json b/metadata-ingestion/tests/unit/stateful_ingestion/state/golden_test_checkpoint_state_after_deleted.json index 6ecd43483d948..5477af72a1939 100644 --- a/metadata-ingestion/tests/unit/stateful_ingestion/state/golden_test_checkpoint_state_after_deleted.json +++ b/metadata-ingestion/tests/unit/stateful_ingestion/state/golden_test_checkpoint_state_after_deleted.json @@ -16,8 +16,8 @@ "config": "", "state": { "formatVersion": "1.0", - "serde": "base85-bz2-json", - "payload": "LRx4!F+o`-Q(317h`0a%NgsevWH1l}0MH7jr`{?c0B9vdZ9%mLfYG4P6;f$2G%+v`9z&~6n|e(JEPC2_Iix~CA_im)jR-zsjEK*yo|HQz#IUUHtf@DYVEme-lUW9{Xmmt~y^2jCdyY95az!{$kf#WUxB" + "serde": "utf-8", + "payload": "{\"urns\": [\"urn:li:dataset:(urn:li:dataPlatform:postgres,dummy_dataset1,PROD)\", \"urn:li:dataset:(urn:li:dataPlatform:postgres,dummy_dataset2,PROD)\"]}" }, "runId": "dummy-test-stateful-ingestion" } diff --git a/metadata-ingestion/tests/unit/stateful_ingestion/state/golden_test_checkpoint_state_after_deleted_failure.json b/metadata-ingestion/tests/unit/stateful_ingestion/state/golden_test_checkpoint_state_after_deleted_failure.json new file mode 100644 index 0000000000000..fcf73d9614f24 --- /dev/null +++ b/metadata-ingestion/tests/unit/stateful_ingestion/state/golden_test_checkpoint_state_after_deleted_failure.json @@ -0,0 +1,26 @@ +[ +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(file,dummy_stateful,prod),default_stale_entity_removal)", + "changeType": "UPSERT", + "aspectName": "datahubIngestionCheckpoint", + "aspect": { + "json": { + "timestampMillis": 1586847600000, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "pipelineName": "dummy_stateful", + "platformInstanceId": "", + "config": "", + "state": { + "formatVersion": "1.0", + "serde": "utf-8", + "payload": "{\"urns\": [\"urn:li:dataset:(urn:li:dataPlatform:postgres,dummy_dataset1,PROD)\", \"urn:li:dataset:(urn:li:dataPlatform:postgres,dummy_dataset2,PROD)\", \"urn:li:dataset:(urn:li:dataPlatform:postgres,dummy_dataset3,PROD)\"]}" + }, + "runId": "dummy-test-stateful-ingestion" + } + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/stateful_ingestion/state/golden_test_checkpoint_state_failure.json b/metadata-ingestion/tests/unit/stateful_ingestion/state/golden_test_checkpoint_state_failure.json new file mode 100644 index 0000000000000..fcf73d9614f24 --- /dev/null +++ b/metadata-ingestion/tests/unit/stateful_ingestion/state/golden_test_checkpoint_state_failure.json @@ -0,0 +1,26 @@ +[ +{ + "entityType": "dataJob", + "entityUrn": "urn:li:dataJob:(urn:li:dataFlow:(file,dummy_stateful,prod),default_stale_entity_removal)", + "changeType": "UPSERT", + "aspectName": "datahubIngestionCheckpoint", + "aspect": { + "json": { + "timestampMillis": 1586847600000, + "partitionSpec": { + "type": "FULL_TABLE", + "partition": "FULL_TABLE_SNAPSHOT" + }, + "pipelineName": "dummy_stateful", + "platformInstanceId": "", + "config": "", + "state": { + "formatVersion": "1.0", + "serde": "utf-8", + "payload": "{\"urns\": [\"urn:li:dataset:(urn:li:dataPlatform:postgres,dummy_dataset1,PROD)\", \"urn:li:dataset:(urn:li:dataPlatform:postgres,dummy_dataset2,PROD)\", \"urn:li:dataset:(urn:li:dataPlatform:postgres,dummy_dataset3,PROD)\"]}" + }, + "runId": "dummy-test-stateful-ingestion" + } + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/stateful_ingestion/state/golden_test_stateful_ingestion_after_deleted_failure.json b/metadata-ingestion/tests/unit/stateful_ingestion/state/golden_test_stateful_ingestion_after_deleted_failure.json new file mode 100644 index 0000000000000..a1f5132cac0a3 --- /dev/null +++ b/metadata-ingestion/tests/unit/stateful_ingestion/state/golden_test_stateful_ingestion_after_deleted_failure.json @@ -0,0 +1,34 @@ +[ +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,dummy_dataset1,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "dummy-test-stateful-ingestion", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,dummy_dataset2,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "dummy-test-stateful-ingestion", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/stateful_ingestion/state/golden_test_stateful_ingestion_failure.json b/metadata-ingestion/tests/unit/stateful_ingestion/state/golden_test_stateful_ingestion_failure.json new file mode 100644 index 0000000000000..4a77651c93066 --- /dev/null +++ b/metadata-ingestion/tests/unit/stateful_ingestion/state/golden_test_stateful_ingestion_failure.json @@ -0,0 +1,50 @@ +[ +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,dummy_dataset1,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "dummy-test-stateful-ingestion", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,dummy_dataset2,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "dummy-test-stateful-ingestion", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:postgres,dummy_dataset3,PROD)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "dummy-test-stateful-ingestion", + "lastRunId": "no-run-id-provided" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/unit/stateful_ingestion/state/test_stateful_ingestion.py b/metadata-ingestion/tests/unit/stateful_ingestion/state/test_stateful_ingestion.py index 2b811d5e5e3a3..50d9b86b3a017 100644 --- a/metadata-ingestion/tests/unit/stateful_ingestion/state/test_stateful_ingestion.py +++ b/metadata-ingestion/tests/unit/stateful_ingestion/state/test_stateful_ingestion.py @@ -1,7 +1,9 @@ from dataclasses import dataclass, field as dataclass_field from typing import Any, Dict, Iterable, List, Optional, cast +from unittest import mock import pydantic +import pytest from freezegun import freeze_time from pydantic import Field @@ -56,6 +58,10 @@ class DummySourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin): stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = pydantic.Field( default=None, description="Dummy source Ingestion Config." ) + report_failure: bool = Field( + default=False, + description="Should this dummy source report a failure.", + ) class DummySource(StatefulIngestionSourceBase): @@ -67,7 +73,7 @@ class DummySource(StatefulIngestionSourceBase): reporter: DummySourceReport def __init__(self, config: DummySourceConfig, ctx: PipelineContext): - super(DummySource, self).__init__(config, ctx) + super().__init__(config, ctx) self.source_config = config self.reporter = DummySourceReport() # Create and register the stateful ingestion use-case handler. @@ -103,10 +109,23 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: aspect=StatusClass(removed=False), ).as_workunit() + if self.source_config.report_failure: + self.reporter.report_failure("Dummy error", "Error") + def get_report(self) -> SourceReport: return self.reporter +@pytest.fixture(scope="module") +def mock_generic_checkpoint_state(): + with mock.patch( + "datahub.ingestion.source.state.entity_removal_state.GenericCheckpointState" + ) as mock_checkpoint_state: + checkpoint_state = mock_checkpoint_state.return_value + checkpoint_state.serde.return_value = "utf-8" + yield mock_checkpoint_state + + @freeze_time(FROZEN_TIME) def test_stateful_ingestion(pytestconfig, tmp_path, mock_time): # test stateful ingestion using dummy source @@ -148,80 +167,209 @@ def test_stateful_ingestion(pytestconfig, tmp_path, mock_time): }, } - pipeline_run1 = None - pipeline_run1_config: Dict[str, Dict[str, Dict[str, Any]]] = dict( # type: ignore - base_pipeline_config # type: ignore + with mock.patch( + "datahub.ingestion.source.state.stale_entity_removal_handler.StaleEntityRemovalHandler._get_state_obj" + ) as mock_state: + mock_state.return_value = GenericCheckpointState(serde="utf-8") + pipeline_run1 = None + pipeline_run1_config: Dict[str, Dict[str, Dict[str, Any]]] = dict( # type: ignore + base_pipeline_config # type: ignore + ) + pipeline_run1_config["sink"]["config"][ + "filename" + ] = f"{tmp_path}/{output_file_name}" + pipeline_run1 = Pipeline.create(pipeline_run1_config) + pipeline_run1.run() + pipeline_run1.raise_from_status() + pipeline_run1.pretty_print_summary() + + # validate both dummy source mces and checkpoint state mces files + mce_helpers.check_golden_file( + pytestconfig, + output_path=tmp_path / output_file_name, + golden_path=f"{test_resources_dir}/{golden_file_name}", + ) + mce_helpers.check_golden_file( + pytestconfig, + output_path=tmp_path / state_file_name, + golden_path=f"{test_resources_dir}/{golden_state_file_name}", + ) + checkpoint1 = get_current_checkpoint_from_pipeline(pipeline_run1) + assert checkpoint1 + assert checkpoint1.state + + with mock.patch( + "datahub.ingestion.source.state.stale_entity_removal_handler.StaleEntityRemovalHandler._get_state_obj" + ) as mock_state: + mock_state.return_value = GenericCheckpointState(serde="utf-8") + pipeline_run2 = None + pipeline_run2_config: Dict[str, Dict[str, Dict[str, Any]]] = dict(base_pipeline_config) # type: ignore + pipeline_run2_config["source"]["config"]["dataset_patterns"] = { + "allow": ["dummy_dataset1", "dummy_dataset2"], + } + pipeline_run2_config["sink"]["config"][ + "filename" + ] = f"{tmp_path}/{output_file_name_after_deleted}" + pipeline_run2 = Pipeline.create(pipeline_run2_config) + pipeline_run2.run() + pipeline_run2.raise_from_status() + pipeline_run2.pretty_print_summary() + + # validate both updated dummy source mces and checkpoint state mces files after deleting dataset + mce_helpers.check_golden_file( + pytestconfig, + output_path=tmp_path / output_file_name_after_deleted, + golden_path=f"{test_resources_dir}/{golden_file_name_after_deleted}", + ) + mce_helpers.check_golden_file( + pytestconfig, + output_path=tmp_path / state_file_name, + golden_path=f"{test_resources_dir}/{golden_state_file_name_after_deleted}", + ) + checkpoint2 = get_current_checkpoint_from_pipeline(pipeline_run2) + assert checkpoint2 + assert checkpoint2.state + + # Validate that all providers have committed successfully. + validate_all_providers_have_committed_successfully( + pipeline=pipeline_run1, expected_providers=1 + ) + validate_all_providers_have_committed_successfully( + pipeline=pipeline_run2, expected_providers=1 + ) + + # Perform all assertions on the states. The deleted table should not be + # part of the second state + state1 = cast(GenericCheckpointState, checkpoint1.state) + state2 = cast(GenericCheckpointState, checkpoint2.state) + + difference_dataset_urns = list( + state1.get_urns_not_in(type="dataset", other_checkpoint_state=state2) + ) + # the difference in dataset urns is the dataset which is not allowed to ingest + assert len(difference_dataset_urns) == 1 + deleted_dataset_urns: List[str] = [ + "urn:li:dataset:(urn:li:dataPlatform:postgres,dummy_dataset3,PROD)", + ] + assert sorted(deleted_dataset_urns) == sorted(difference_dataset_urns) + + +@freeze_time(FROZEN_TIME) +def test_stateful_ingestion_failure(pytestconfig, tmp_path, mock_time): + # test stateful ingestion using dummy source with pipeline execution failed in second ingestion + state_file_name: str = "checkpoint_state_mces_failure.json" + golden_state_file_name: str = "golden_test_checkpoint_state_failure.json" + golden_state_file_name_after_deleted: str = ( + "golden_test_checkpoint_state_after_deleted_failure.json" ) - pipeline_run1_config["sink"]["config"][ - "filename" - ] = f"{tmp_path}/{output_file_name}" - pipeline_run1 = Pipeline.create(pipeline_run1_config) - pipeline_run1.run() - pipeline_run1.raise_from_status() - pipeline_run1.pretty_print_summary() - - # validate both dummy source mces and checkpoint state mces files - mce_helpers.check_golden_file( - pytestconfig, - output_path=tmp_path / output_file_name, - golden_path=f"{test_resources_dir}/{golden_file_name}", + output_file_name: str = "dummy_mces_failure.json" + golden_file_name: str = "golden_test_stateful_ingestion_failure.json" + output_file_name_after_deleted: str = ( + "dummy_mces_stateful_after_deleted_failure.json" ) - mce_helpers.check_golden_file( - pytestconfig, - output_path=tmp_path / state_file_name, - golden_path=f"{test_resources_dir}/{golden_state_file_name}", + golden_file_name_after_deleted: str = ( + "golden_test_stateful_ingestion_after_deleted_failure.json" ) - checkpoint1 = get_current_checkpoint_from_pipeline(pipeline_run1) - assert checkpoint1 - assert checkpoint1.state - - pipeline_run2 = None - pipeline_run2_config: Dict[str, Dict[str, Dict[str, Any]]] = dict(base_pipeline_config) # type: ignore - pipeline_run2_config["source"]["config"]["dataset_patterns"] = { - "allow": ["dummy_dataset1", "dummy_dataset2"], + + test_resources_dir = pytestconfig.rootpath / "tests/unit/stateful_ingestion/state" + + base_pipeline_config = { + "run_id": "dummy-test-stateful-ingestion", + "pipeline_name": "dummy_stateful", + "source": { + "type": "tests.unit.stateful_ingestion.state.test_stateful_ingestion.DummySource", + "config": { + "stateful_ingestion": { + "enabled": True, + "remove_stale_metadata": True, + "state_provider": { + "type": "file", + "config": { + "filename": f"{tmp_path}/{state_file_name}", + }, + }, + }, + }, + }, + "sink": { + "type": "file", + "config": {}, + }, } - pipeline_run2_config["sink"]["config"][ - "filename" - ] = f"{tmp_path}/{output_file_name_after_deleted}" - pipeline_run2 = Pipeline.create(pipeline_run2_config) - pipeline_run2.run() - pipeline_run2.raise_from_status() - pipeline_run2.pretty_print_summary() - - # validate both updated dummy source mces and checkpoint state mces files after deleting dataset - mce_helpers.check_golden_file( - pytestconfig, - output_path=tmp_path / output_file_name_after_deleted, - golden_path=f"{test_resources_dir}/{golden_file_name_after_deleted}", - ) - mce_helpers.check_golden_file( - pytestconfig, - output_path=tmp_path / state_file_name, - golden_path=f"{test_resources_dir}/{golden_state_file_name_after_deleted}", - ) - checkpoint2 = get_current_checkpoint_from_pipeline(pipeline_run2) - assert checkpoint2 - assert checkpoint2.state - # Validate that all providers have committed successfully. - validate_all_providers_have_committed_successfully( - pipeline=pipeline_run1, expected_providers=1 - ) - validate_all_providers_have_committed_successfully( - pipeline=pipeline_run2, expected_providers=1 - ) + with mock.patch( + "datahub.ingestion.source.state.stale_entity_removal_handler.StaleEntityRemovalHandler._get_state_obj" + ) as mock_state: + mock_state.return_value = GenericCheckpointState(serde="utf-8") + pipeline_run1 = None + pipeline_run1_config: Dict[str, Dict[str, Dict[str, Any]]] = dict( # type: ignore + base_pipeline_config # type: ignore + ) + pipeline_run1_config["sink"]["config"][ + "filename" + ] = f"{tmp_path}/{output_file_name}" + pipeline_run1 = Pipeline.create(pipeline_run1_config) + pipeline_run1.run() + pipeline_run1.raise_from_status() + pipeline_run1.pretty_print_summary() + + # validate both dummy source mces and checkpoint state mces files + mce_helpers.check_golden_file( + pytestconfig, + output_path=tmp_path / output_file_name, + golden_path=f"{test_resources_dir}/{golden_file_name}", + ) + mce_helpers.check_golden_file( + pytestconfig, + output_path=tmp_path / state_file_name, + golden_path=f"{test_resources_dir}/{golden_state_file_name}", + ) + checkpoint1 = get_current_checkpoint_from_pipeline(pipeline_run1) + assert checkpoint1 + assert checkpoint1.state - # Perform all assertions on the states. The deleted table should not be - # part of the second state - state1 = cast(GenericCheckpointState, checkpoint1.state) - state2 = cast(GenericCheckpointState, checkpoint2.state) + with mock.patch( + "datahub.ingestion.source.state.stale_entity_removal_handler.StaleEntityRemovalHandler._get_state_obj" + ) as mock_state: + mock_state.return_value = GenericCheckpointState(serde="utf-8") + pipeline_run2 = None + pipeline_run2_config: Dict[str, Dict[str, Dict[str, Any]]] = dict(base_pipeline_config) # type: ignore + pipeline_run2_config["source"]["config"]["dataset_patterns"] = { + "allow": ["dummy_dataset1", "dummy_dataset2"], + } + pipeline_run2_config["source"]["config"]["report_failure"] = True + pipeline_run2_config["sink"]["config"][ + "filename" + ] = f"{tmp_path}/{output_file_name_after_deleted}" + pipeline_run2 = Pipeline.create(pipeline_run2_config) + pipeline_run2.run() + pipeline_run2.pretty_print_summary() - difference_dataset_urns = list( - state1.get_urns_not_in(type="dataset", other_checkpoint_state=state2) - ) - # the difference in dataset urns is the dataset which is not allowed to ingest - assert len(difference_dataset_urns) == 1 - deleted_dataset_urns: List[str] = [ - "urn:li:dataset:(urn:li:dataPlatform:postgres,dummy_dataset3,PROD)", - ] - assert sorted(deleted_dataset_urns) == sorted(difference_dataset_urns) + # validate both updated dummy source mces and checkpoint state mces files after deleting dataset + mce_helpers.check_golden_file( + pytestconfig, + output_path=tmp_path / output_file_name_after_deleted, + golden_path=f"{test_resources_dir}/{golden_file_name_after_deleted}", + ) + mce_helpers.check_golden_file( + pytestconfig, + output_path=tmp_path / state_file_name, + golden_path=f"{test_resources_dir}/{golden_state_file_name_after_deleted}", + ) + checkpoint2 = get_current_checkpoint_from_pipeline(pipeline_run2) + assert checkpoint2 + assert checkpoint2.state + + # Validate that all providers have committed successfully. + validate_all_providers_have_committed_successfully( + pipeline=pipeline_run1, expected_providers=1 + ) + validate_all_providers_have_committed_successfully( + pipeline=pipeline_run2, expected_providers=1 + ) + + # Perform assertions on the states. The deleted table should be + # still part of the second state as pipeline run failed + state1 = cast(GenericCheckpointState, checkpoint1.state) + state2 = cast(GenericCheckpointState, checkpoint2.state) + assert state1 == state2 diff --git a/metadata-ingestion/tests/unit/test_bigquery_source.py b/metadata-ingestion/tests/unit/test_bigquery_source.py index 426d4dc12f208..c501593fbed01 100644 --- a/metadata-ingestion/tests/unit/test_bigquery_source.py +++ b/metadata-ingestion/tests/unit/test_bigquery_source.py @@ -829,7 +829,7 @@ def bigquery_view_2() -> BigqueryView: return BigqueryView( name="table2", created=now, - last_altered=now, + last_altered=None, comment="comment2", view_definition="CREATE VIEW 2", materialized=True, diff --git a/metadata-ingestion/tests/unit/test_glue_source.py b/metadata-ingestion/tests/unit/test_glue_source.py index 8fb840ee003c7..5e721fc5c1293 100644 --- a/metadata-ingestion/tests/unit/test_glue_source.py +++ b/metadata-ingestion/tests/unit/test_glue_source.py @@ -33,11 +33,15 @@ databases_1, databases_2, get_bucket_tagging, + get_databases_delta_response, get_databases_response, get_databases_response_with_resource_link, get_dataflow_graph_response_1, get_dataflow_graph_response_2, + get_delta_tables_response_1, + get_delta_tables_response_2, get_jobs_response, + get_jobs_response_empty, get_object_body_1, get_object_body_2, get_object_response_1, @@ -57,15 +61,21 @@ GMS_SERVER = f"http://localhost:{GMS_PORT}" -def glue_source(platform_instance: Optional[str] = None) -> GlueSource: +def glue_source( + platform_instance: Optional[str] = None, + use_s3_bucket_tags: bool = True, + use_s3_object_tags: bool = True, + extract_delta_schema_from_parameters: bool = False, +) -> GlueSource: return GlueSource( ctx=PipelineContext(run_id="glue-source-test"), config=GlueSourceConfig( aws_region="us-west-2", extract_transforms=True, platform_instance=platform_instance, - use_s3_bucket_tags=True, - use_s3_object_tags=True, + use_s3_bucket_tags=use_s3_bucket_tags, + use_s3_object_tags=use_s3_object_tags, + extract_delta_schema_from_parameters=extract_delta_schema_from_parameters, ), ) @@ -336,3 +346,77 @@ def test_glue_stateful(pytestconfig, tmp_path, mock_time, mock_datahub_graph): "urn:li:dataset:(urn:li:dataPlatform:glue,flights-database.avro,PROD)", "urn:li:container:0b9f1f731ecf6743be6207fec3dc9cba", } + + +def test_glue_with_delta_schema_ingest( + tmp_path: Path, + pytestconfig: PytestConfig, +) -> None: + glue_source_instance = glue_source( + platform_instance="delta_platform_instance", + use_s3_bucket_tags=False, + use_s3_object_tags=False, + extract_delta_schema_from_parameters=True, + ) + + with Stubber(glue_source_instance.glue_client) as glue_stubber: + glue_stubber.add_response("get_databases", get_databases_delta_response, {}) + glue_stubber.add_response( + "get_tables", + get_delta_tables_response_1, + {"DatabaseName": "delta-database"}, + ) + glue_stubber.add_response("get_jobs", get_jobs_response_empty, {}) + + mce_objects = [wu.metadata for wu in glue_source_instance.get_workunits()] + + glue_stubber.assert_no_pending_responses() + + assert glue_source_instance.get_report().num_dataset_valid_delta_schema == 1 + + write_metadata_file(tmp_path / "glue_delta_mces.json", mce_objects) + + # Verify the output. + test_resources_dir = pytestconfig.rootpath / "tests/unit/glue" + mce_helpers.check_golden_file( + pytestconfig, + output_path=tmp_path / "glue_delta_mces.json", + golden_path=test_resources_dir / "glue_delta_mces_golden.json", + ) + + +def test_glue_with_malformed_delta_schema_ingest( + tmp_path: Path, + pytestconfig: PytestConfig, +) -> None: + glue_source_instance = glue_source( + platform_instance="delta_platform_instance", + use_s3_bucket_tags=False, + use_s3_object_tags=False, + extract_delta_schema_from_parameters=True, + ) + + with Stubber(glue_source_instance.glue_client) as glue_stubber: + glue_stubber.add_response("get_databases", get_databases_delta_response, {}) + glue_stubber.add_response( + "get_tables", + get_delta_tables_response_2, + {"DatabaseName": "delta-database"}, + ) + glue_stubber.add_response("get_jobs", get_jobs_response_empty, {}) + + mce_objects = [wu.metadata for wu in glue_source_instance.get_workunits()] + + glue_stubber.assert_no_pending_responses() + + assert glue_source_instance.get_report().num_dataset_invalid_delta_schema == 1 + + write_metadata_file(tmp_path / "glue_malformed_delta_mces.json", mce_objects) + + # Verify the output. + test_resources_dir = pytestconfig.rootpath / "tests/unit/glue" + mce_helpers.check_golden_file( + pytestconfig, + output_path=tmp_path / "glue_malformed_delta_mces.json", + golden_path=test_resources_dir / "glue_malformed_delta_mces_golden.json", + ) diff --git a/metadata-ingestion/tests/unit/test_glue_source_stubs.py b/metadata-ingestion/tests/unit/test_glue_source_stubs.py index 771a0389c1e65..c971001f97072 100644 --- a/metadata-ingestion/tests/unit/test_glue_source_stubs.py +++ b/metadata-ingestion/tests/unit/test_glue_source_stubs.py @@ -271,6 +271,9 @@ }, ] get_tables_response_2 = {"TableList": tables_2} +get_jobs_response_empty: Dict[str, Any] = { + "Jobs": [], +} get_jobs_response = { "Jobs": [ { @@ -787,6 +790,81 @@ job.commit() """ +get_databases_delta_response = { + "DatabaseList": [ + { + "Name": "delta-database", + "CreateTime": datetime.datetime(2021, 6, 9, 14, 14, 19), + "CreateTableDefaultPermissions": [ + { + "Principal": { + "DataLakePrincipalIdentifier": "IAM_ALLOWED_PRINCIPALS" + }, + "Permissions": ["ALL"], + } + ], + "CatalogId": "123412341234", + }, + ] +} +delta_tables_1 = [ + { + "Name": "delta_table_1", + "DatabaseName": "delta-database", + "Owner": "owner", + "CreateTime": datetime.datetime(2021, 6, 9, 14, 17, 35), + "UpdateTime": datetime.datetime(2021, 6, 9, 14, 17, 35), + "LastAccessTime": datetime.datetime(2021, 6, 9, 14, 17, 35), + "Retention": 0, + "StorageDescriptor": { + "Columns": [ + {"Name": "col", "Type": "array", "Comment": "some comment"}, + ], + "Location": "s3://crawler-public-us-west-2/delta/", + }, + "TableType": "EXTERNAL_TABLE", + "Parameters": { + "spark.sql.sources.provider": "delta", + "spark.sql.sources.schema.numParts": "3", + "spark.sql.sources.schema.part.0": '{"type":"struct","fields":[{"name":"ecg_session_id","type":"string","nullable":true,"metadata":{}},{"name":"page_type_txt","type":"string","nullable":true,"metadata":{}},{"name":"clsfd_pltfrm_id","type":"integer","nullable":true,"metadata":{}},{"name":"clsfd_dvic_id","type":"integer","nullable":true,"metadata":{}},{"name":"ga_vstr_id","type":"string","nullable":true,"metadata":{}},{"name":"src_ad_id","type":"string","nullable":true,"metadata":{}},{"name":"clsfd_ad_id","type":"long","nullable":true,"metadata":{}},{"name":"clsfd_user_id","type":"long","nullable":true,"metadata":{}},{"name":"src_categ_id","type":"string","nullable":true,"metadata":{}},{"name":"clsfd_categ_ref_id","type":"integer","nullable":true,"metadata":{}},{"name":"clsfd_geo_ref_id","type":"integer","nullable":true,"metadata":{}},{"name":"src_loc_id","type":"string","nullable":true,"metadata":{}},{"name":"geo_region_name","type":"string","nullable":true,"metadata":{}},{"name":"geo_cntry_name","type":"string","nullable":true,"metadata":{}},{"name":"geo_city_name","type":"string","nullable":true,"metadata":{}},{"name":"ga_prfl_id","type":"integer","nullable":true,"metadata":{}},{"name":"clsfd_ga_prfl_name","type":"string","nullable":true,"metadata":{}},{"name":"ga_vst_id","type":"integer","nullable":true,"metadata":{}},{"name":"app_vrsn_txt","type":"string","nullable":true,"metadata":{}},{"name":"chnl_group","type":"string","nullable":true,"metadata":{}},{"name":"clsfd_trffc_chnl_name","type":"string","nullable":true,"metadata":{}},{"name":"vst_mdm_txt","type":"string","nullable":true,"metadata":{}},{"name":"vst_src_txt","type":"string","nullable":true,"metadata":{}},{"name":"vst_src_cmpgn_code","type":"string","nullable":true,"metadata":{}},{"name":"vst_src_cmpgn_txt","type":"string","nullable":true,"metadata":{}},{"name":"vst_src_cntnt_txt","type":"string","nullable":true,"metadata":{}},{"name":"vst_src_ad_kywrd_txt","type":"string","nullable":true,"metadata":{}},{"name":"clsfd_vst_drtn_num","type":"long","nullable":true,"metadata":{}},{"name":"home_page_txt","type":"string","nullable":true,"metadata":{}},{"name":"session_start_time_num","type":"integer","nullable":true,"metadata":{}},{"name":"brwsr_name","type":"string","nullable":true,"metadata":{}},{"name":"brwsr_vrsn_txt","type":"string","nullable":true,"metadata":{}},{"name":"clsfd_is_user_login_flag","type":"integer","nullable":true,"metadata":{}},{"name":"lang_code","type":"string","nullable":true,"metadata":{}},{"name":"clsfd_is_direct_flag","type":"integer","nullable":true,"metadata":{}},{"name":"os_vrsn_txt","type":"string","nullable":true,"metadata":{}},{"name":"os_name","type":"string","nullable":true,"metadata":{}},{"name":"host_name","type":"string","nullable":true,"metadata":{}},{"name":"vst_src_is_direct_flag","type":"integer","nullable":true,"metadata":{}},{"name":"clsfd_is_session_flag","type":"integer","nullable":true,"metadata":{}},{"name":"app_id","type":"string","nullable":true,"metadata":{}},{"name":"encypted_user_id","type":"string","nullable":true,"metadata":{}},{"name":"decypted_user_id","type":"string","nullable":true,"metadata":{}},{"name":"encrptd_email","type":"string","nullable":true,"metadata":{}},{"name":"page_path_txt","type":"string","nullable":true,"metadata":{}},{"name":"scrn_name","type":"string","nullable":true,"metadata":{}},{"name":"socl_engmnt_type","type":"string","nullable":true,"metadata":{}},{"name":"vst_src_path_txt","type":"string","nullable":true,"metadata":{}},{"name":"adword_user_id","type":"long","nullable":true,"metadata":{}},{"name":"adword_cmpgn_id","type":"long","nullable":true,"metadata":{}},{"name":"adword_adgroup_id","type":"long","nullable":true,"metadata":{}},{"name":"adword_crtv_id","type":"long","nullable":true,"metadata":{}},{"name":"adword_criteria_id","type":"long","nullable":true,"metadata":{}},{"name":"adword_criteria_param_txt","type":"string","nullable":true,"metadata":{}},{"name":"adword_page_num","type":"long","nullable":true,"metadata":{', + "spark.sql.sources.schema.part.1": '}},{"name":"adword_slot_txt","type":"string","nullable":true,"metadata":{}},{"name":"adword_click_id","type":"string","nullable":true,"metadata":{}},{"name":"adword_ntwrk_type","type":"string","nullable":true,"metadata":{}},{"name":"adword_is_videoad_flag","type":"integer","nullable":true,"metadata":{}},{"name":"adword_criteria_boomuserlist_id","type":"long","nullable":true,"metadata":{}},{"name":"brwsr_size_txt","type":"string","nullable":true,"metadata":{}},{"name":"mbl_dvic_brand_name","type":"string","nullable":true,"metadata":{}},{"name":"mbl_dvic_model_name","type":"string","nullable":true,"metadata":{}},{"name":"mbl_input_slctr_name","type":"string","nullable":true,"metadata":{}},{"name":"mbl_dvic_info_txt","type":"string","nullable":true,"metadata":{}},{"name":"mbl_dvic_mkt_name","type":"string","nullable":true,"metadata":{}},{"name":"flash_vrsn_txt","type":"string","nullable":true,"metadata":{}},{"name":"is_java_enabled_flag","type":"integer","nullable":true,"metadata":{}},{"name":"scrn_color_txt","type":"string","nullable":true,"metadata":{}},{"name":"scrn_rsln_txt","type":"string","nullable":true,"metadata":{}},{"name":"geo_cntint_name","type":"string","nullable":true,"metadata":{}},{"name":"geo_subcntint_name","type":"string","nullable":true,"metadata":{}},{"name":"geo_metro_name","type":"string","nullable":true,"metadata":{}},{"name":"geo_city_id","type":"string","nullable":true,"metadata":{}},{"name":"geo_ntwrk_dmn_name","type":"string","nullable":true,"metadata":{}},{"name":"geo_ltitd","type":"string","nullable":true,"metadata":{}},{"name":"geo_lngtd","type":"string","nullable":true,"metadata":{}},{"name":"geo_ntwrk_loc_name","type":"string","nullable":true,"metadata":{}},{"name":"session_ab_test_group_txt","type":"string","nullable":true,"metadata":{}},{"name":"clsfd_vst_cnt","type":"long","nullable":true,"metadata":{}},{"name":"clsfd_new_vstr_cnt","type":"long","nullable":true,"metadata":{}},{"name":"clsfd_vst_num","type":"long","nullable":true,"metadata":{}},{"name":"clsfd_hit_cnt","type":"long","nullable":true,"metadata":{}},{"name":"clsfd_pv_cnt","type":"long","nullable":true,"metadata":{}},{"name":"clsfd_srp_pv_cnt","type":"long","nullable":true,"metadata":{}},{"name":"clsfd_unq_srp_pv_cnt","type":"long","nullable":true,"metadata":{}},{"name":"clsfd_vip_pv_cnt","type":"long","nullable":true,"metadata":{}},{"name":"clsfd_unq_vip_pv_cnt","type":"long","nullable":true,"metadata":{}},{"name":"clsfd_scrn_view_cnt","type":"long","nullable":true,"metadata":{}},{"name":"clsfd_uniq_scrn_view_cnt","type":"long","nullable":true,"metadata":{}},{"name":"clsfd_scrn_drtn_num","type":"long","nullable":true,"metadata":{}},{"name":"clsfd_bnc_cnt","type":"long","nullable":true,"metadata":{}},{"name":"clsfd_trxn_cnt","type":"long","nullable":true,"metadata":{}},{"name":"clsfd_trxn_rev_amt","type":"long","nullable":true,"metadata":{}},{"name":"ga_session_list_array","type":{"type":"array","elementType":{"type":"struct","fields":[{"name":"clsfd_session_id","type":"string","nullable":true,"metadata":{}},{"name":"clsfd_sum_dt","type":"date","nullable":true,"metadata":{}}]},"containsNull":true},"nullable":true,"metadata":{}},{"name":"sess_cd","type":{"type":"map","keyType":"integer","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"lp_hit_cd","type":{"type":"map","keyType":"integer","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"ext_map","type":{"type":"map","keyType":"integer","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"clsfd_cntry_name","type":"string","nullable":true,"metadata":{}},{"name":"cre_date","type":"date","nullable":true,"metadata":{}},{"name":"cre_user","type":"string","nullable":true,"metadata":{}},{"name":"upd_date","type":"date","nullable":true,"metadata":{}},{"name":"upd_user","type":"string","nullable":true,"metadata":{}},{"name":"clsfd_site_id","type":"integer","nullable":true,"metadata":{}},{"', + "spark.sql.sources.schema.part.2": 'name":"ecg_session_start_dt","type":"date","nullable":true,"metadata":{}}]}', + }, + "CreatedBy": "arn:aws:sts::123412341234:assumed-role/AWSGlueServiceRole-flights-crawler/AWS-Crawler", + "IsRegisteredWithLakeFormation": False, + "CatalogId": "123412341234", + } +] +get_delta_tables_response_1 = {"TableList": delta_tables_1} + +delta_tables_2 = [ + { + "Name": "delta_table_1", + "DatabaseName": "delta-database", + "Owner": "owner", + "CreateTime": datetime.datetime(2021, 6, 9, 14, 17, 35), + "UpdateTime": datetime.datetime(2021, 6, 9, 14, 17, 35), + "LastAccessTime": datetime.datetime(2021, 6, 9, 14, 17, 35), + "Retention": 0, + "StorageDescriptor": { + "Columns": [ + {"Name": "col", "Type": "array", "Comment": "some comment"}, + ], + "Location": "s3://crawler-public-us-west-2/delta/", + }, + "TableType": "EXTERNAL_TABLE", + "Parameters": { + "spark.sql.sources.provider": "delta", + "spark.sql.sources.schema.numParts": "1", + "spark.sql.sources.schema.part.0": "this is totally wrong!", + }, + "CreatedBy": "arn:aws:sts::123412341234:assumed-role/AWSGlueServiceRole-flights-crawler/AWS-Crawler", + "IsRegisteredWithLakeFormation": False, + "CatalogId": "123412341234", + } +] +get_delta_tables_response_2 = {"TableList": delta_tables_2} + def mock_get_object_response(raw_body: str) -> Dict[str, Any]: """ diff --git a/metadata-ingestion/tests/unit/test_metabase_source.py b/metadata-ingestion/tests/unit/test_metabase_source.py index 08c0ddb503664..47974617366f2 100644 --- a/metadata-ingestion/tests/unit/test_metabase_source.py +++ b/metadata-ingestion/tests/unit/test_metabase_source.py @@ -1,12 +1,15 @@ from datahub.ingestion.api.common import PipelineContext -from datahub.ingestion.api.source import SourceReport -from datahub.ingestion.source.metabase import MetabaseConfig, MetabaseSource +from datahub.ingestion.source.metabase import ( + MetabaseConfig, + MetabaseReport, + MetabaseSource, +) class TestMetabaseSource(MetabaseSource): def __init__(self, ctx: PipelineContext, config: MetabaseConfig): self.config = config - self.report = SourceReport() + self.report = MetabaseReport() def test_get_platform_instance(): diff --git a/metadata-ingestion/tests/unit/test_pipeline.py b/metadata-ingestion/tests/unit/test_pipeline.py index 194a396edb310..bcc0f73a5c967 100644 --- a/metadata-ingestion/tests/unit/test_pipeline.py +++ b/metadata-ingestion/tests/unit/test_pipeline.py @@ -29,7 +29,7 @@ pytestmark = pytest.mark.random_order(disabled=True) -class TestPipeline(object): +class TestPipeline: @patch("datahub.ingestion.source.kafka.KafkaSource.get_workunits", autospec=True) @patch("datahub.ingestion.sink.console.ConsoleSink.close", autospec=True) @freeze_time(FROZEN_TIME) diff --git a/metadata-ingestion/tests/unit/test_snowflake_shares.py b/metadata-ingestion/tests/unit/test_snowflake_shares.py index 9e33ba6132e06..fc753f99b7e8f 100644 --- a/metadata-ingestion/tests/unit/test_snowflake_shares.py +++ b/metadata-ingestion/tests/unit/test_snowflake_shares.py @@ -284,7 +284,7 @@ def test_snowflake_shares_workunit_outbound_share( ] entity_urns.add(wu.get_urn()) - assert len((entity_urns)) == 6 + assert len(entity_urns) == 6 def test_snowflake_shares_workunit_inbound_and_outbound_share( diff --git a/metadata-ingestion/tests/unit/test_transform_dataset.py b/metadata-ingestion/tests/unit/test_transform_dataset.py index 89d4fcca8801c..a0deae972badb 100644 --- a/metadata-ingestion/tests/unit/test_transform_dataset.py +++ b/metadata-ingestion/tests/unit/test_transform_dataset.py @@ -1,5 +1,6 @@ import json import re +from datetime import datetime, timezone from typing import ( Any, Callable, @@ -66,12 +67,18 @@ PatternAddDatasetDomain, SimpleAddDatasetDomain, ) +from datahub.ingestion.transformer.dataset_domain_based_on_tags import ( + DatasetTagDomainMapper, +) from datahub.ingestion.transformer.dataset_transformer import DatasetTransformer from datahub.ingestion.transformer.extract_dataset_tags import ExtractDatasetTags from datahub.ingestion.transformer.extract_ownership_from_tags import ( ExtractOwnersFromTagsTransformer, ) from datahub.ingestion.transformer.mark_dataset_status import MarkDatasetStatus +from datahub.ingestion.transformer.pattern_cleanup_dataset_usage_user import ( + PatternCleanupDatasetUsageUser, +) from datahub.ingestion.transformer.pattern_cleanup_ownership import ( PatternCleanUpOwnership, ) @@ -82,6 +89,7 @@ from datahub.metadata.schema_classes import ( BrowsePathsClass, DatasetPropertiesClass, + DatasetUserUsageCountsClass, GlobalTagsClass, MetadataChangeEventClass, OwnershipClass, @@ -3291,3 +3299,355 @@ def test_replace_external_regex_replace_2( output[0].record.aspect.externalUrl == "https://test.com/test/looker-demo/blob/master/foo.view.lkml" ) + + +def test_pattern_cleanup_usage_statistics_user_1( + mock_datahub_graph, +): + pipeline_context: PipelineContext = PipelineContext( + run_id="test_pattern_cleanup_usage_statistics_user" + ) + pipeline_context.graph = mock_datahub_graph(DatahubClientConfig) + + TS_1 = datetime(year=2023, month=1, day=1, tzinfo=timezone.utc) + + output = run_dataset_transformer_pipeline( + transformer_type=PatternCleanupDatasetUsageUser, + aspect=models.DatasetUsageStatisticsClass( + timestampMillis=int(TS_1.timestamp() * 1000), + userCounts=[ + DatasetUserUsageCountsClass( + user=builder.make_user_urn("IAM:user1"), + count=1, + userEmail="user1@exaple.com", + ), + DatasetUserUsageCountsClass( + user=builder.make_user_urn("user2"), + count=2, + userEmail="user2@exaple.com", + ), + ], + ), + config={"pattern_for_cleanup": ["IAM:"]}, + pipeline_context=pipeline_context, + ) + + expectedUsageStatistics = models.DatasetUsageStatisticsClass( + timestampMillis=int(TS_1.timestamp() * 1000), + userCounts=[ + DatasetUserUsageCountsClass( + user=builder.make_user_urn("user1"), + count=1, + userEmail="user1@exaple.com", + ), + DatasetUserUsageCountsClass( + user=builder.make_user_urn("user2"), + count=2, + userEmail="user2@exaple.com", + ), + ], + ) + + assert len(output) == 2 + assert output[0].record + assert output[0].record.aspect + assert len(output[0].record.aspect.userCounts) == 2 + assert output[0].record.aspect.userCounts == expectedUsageStatistics.userCounts + + +def test_pattern_cleanup_usage_statistics_user_2( + mock_datahub_graph, +): + pipeline_context: PipelineContext = PipelineContext( + run_id="test_pattern_cleanup_usage_statistics_user" + ) + pipeline_context.graph = mock_datahub_graph(DatahubClientConfig) + + TS_1 = datetime(year=2023, month=1, day=1, tzinfo=timezone.utc) + + output = run_dataset_transformer_pipeline( + transformer_type=PatternCleanupDatasetUsageUser, + aspect=models.DatasetUsageStatisticsClass( + timestampMillis=int(TS_1.timestamp() * 1000), + userCounts=[ + DatasetUserUsageCountsClass( + user=builder.make_user_urn("test_user_1"), + count=1, + userEmail="user1@exaple.com", + ), + DatasetUserUsageCountsClass( + user=builder.make_user_urn("test_user_2"), + count=2, + userEmail="user2@exaple.com", + ), + ], + ), + config={"pattern_for_cleanup": ["_user"]}, + pipeline_context=pipeline_context, + ) + + expectedUsageStatistics = models.DatasetUsageStatisticsClass( + timestampMillis=int(TS_1.timestamp() * 1000), + userCounts=[ + DatasetUserUsageCountsClass( + user=builder.make_user_urn("test_1"), + count=1, + userEmail="user1@exaple.com", + ), + DatasetUserUsageCountsClass( + user=builder.make_user_urn("test_2"), + count=2, + userEmail="user2@exaple.com", + ), + ], + ) + + assert len(output) == 2 + assert output[0].record + assert output[0].record.aspect + assert len(output[0].record.aspect.userCounts) == 2 + assert output[0].record.aspect.userCounts == expectedUsageStatistics.userCounts + + +def test_pattern_cleanup_usage_statistics_user_3( + mock_datahub_graph, +): + pipeline_context: PipelineContext = PipelineContext( + run_id="test_pattern_cleanup_usage_statistics_user" + ) + pipeline_context.graph = mock_datahub_graph(DatahubClientConfig) + + TS_1 = datetime(year=2023, month=1, day=1, tzinfo=timezone.utc) + + output = run_dataset_transformer_pipeline( + transformer_type=PatternCleanupDatasetUsageUser, + aspect=models.DatasetUsageStatisticsClass( + timestampMillis=int(TS_1.timestamp() * 1000), + userCounts=[ + DatasetUserUsageCountsClass( + user=builder.make_user_urn("abc_user_1"), + count=1, + userEmail="user1@exaple.com", + ), + DatasetUserUsageCountsClass( + user=builder.make_user_urn("xyz_user_2"), + count=2, + userEmail="user2@exaple.com", + ), + ], + ), + config={"pattern_for_cleanup": [r"_user_\d+"]}, + pipeline_context=pipeline_context, + ) + + expectedUsageStatistics = models.DatasetUsageStatisticsClass( + timestampMillis=int(TS_1.timestamp() * 1000), + userCounts=[ + DatasetUserUsageCountsClass( + user=builder.make_user_urn("abc"), + count=1, + userEmail="user1@exaple.com", + ), + DatasetUserUsageCountsClass( + user=builder.make_user_urn("xyz"), + count=2, + userEmail="user2@exaple.com", + ), + ], + ) + + assert len(output) == 2 + assert output[0].record + assert output[0].record.aspect + assert len(output[0].record.aspect.userCounts) == 2 + assert output[0].record.aspect.userCounts == expectedUsageStatistics.userCounts + + +def test_domain_mapping_based_on_tags_with_valid_tags(mock_datahub_graph): + acryl_domain = builder.make_domain_urn("acryl.io") + server_domain = builder.make_domain_urn("test.io") + + tag_one = builder.make_tag_urn("test:tag_1") + + # Return fake aspect to simulate server behaviour + def fake_get_tags(entity_urn: str) -> models.GlobalTagsClass: + return models.GlobalTagsClass(tags=[TagAssociationClass(tag=tag_one)]) + + pipeline_context = PipelineContext(run_id="transformer_pipe_line") + pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + + pipeline_context.graph.get_tags = fake_get_tags # type: ignore + + output = run_dataset_transformer_pipeline( + transformer_type=DatasetTagDomainMapper, + aspect=models.DomainsClass(domains=[server_domain]), + config={"domain_mapping": {"test:tag_1": acryl_domain}}, + pipeline_context=pipeline_context, + ) + + assert len(output) == 2 + assert output[0] is not None + assert output[0].record is not None + assert isinstance(output[0].record, MetadataChangeProposalWrapper) + assert output[0].record.aspect is not None + assert isinstance(output[0].record.aspect, models.DomainsClass) + transformed_aspect = cast(models.DomainsClass, output[0].record.aspect) + assert len(transformed_aspect.domains) == 1 + assert acryl_domain in transformed_aspect.domains + assert server_domain not in transformed_aspect.domains + + +def test_domain_mapping_based_on_tags_with_no_matching_tags(mock_datahub_graph): + acryl_domain = builder.make_domain_urn("acryl.io") + server_domain = builder.make_domain_urn("test.io") + non_matching_tag = builder.make_tag_urn("nonMatching") + + pipeline_context = PipelineContext(run_id="no_match_pipeline") + pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + + # Return fake aspect to simulate server behaviour + def fake_get_tags(entity_urn: str) -> models.GlobalTagsClass: + return models.GlobalTagsClass(tags=[TagAssociationClass(tag=non_matching_tag)]) + + pipeline_context.graph.get_tags = fake_get_tags # type: ignore + + output = run_dataset_transformer_pipeline( + transformer_type=DatasetTagDomainMapper, + aspect=models.DomainsClass(domains=[server_domain]), + config={ + "domain_mapping": {"test:tag_1": acryl_domain}, + }, + pipeline_context=pipeline_context, + ) + assert len(output) == 2 + assert isinstance(output[0].record.aspect, models.DomainsClass) + assert len(output[0].record.aspect.domains) == 1 + transformed_aspect = cast(models.DomainsClass, output[0].record.aspect) + assert len(transformed_aspect.domains) == 1 + assert acryl_domain not in transformed_aspect.domains + assert server_domain in transformed_aspect.domains + + +def test_domain_mapping_based_on_tags_with_empty_config(mock_datahub_graph): + some_tag = builder.make_tag_urn("someTag") + + pipeline_context = PipelineContext(run_id="empty_config_pipeline") + pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + + # Return fake aspect to simulate server behaviour + def fake_get_tags(entity_urn: str) -> models.GlobalTagsClass: + return models.GlobalTagsClass(tags=[TagAssociationClass(tag=some_tag)]) + + pipeline_context.graph.get_tags = fake_get_tags # type: ignore + + output = run_dataset_transformer_pipeline( + transformer_type=DatasetTagDomainMapper, + aspect=models.DomainsClass(domains=[]), + config={"domain_mapping": {}}, + pipeline_context=pipeline_context, + ) + assert len(output) == 2 + assert isinstance(output[0].record.aspect, models.DomainsClass) + assert len(output[0].record.aspect.domains) == 0 + + +def test_domain_mapping_based__r_on_tags_with_multiple_tags(mock_datahub_graph): + # Two tags that match different rules in the domain mapping configuration + tag_one = builder.make_tag_urn("test:tag_1") + tag_two = builder.make_tag_urn("test:tag_2") + existing_domain = builder.make_domain_urn("existing.io") + finance = builder.make_domain_urn("finance") + hr = builder.make_domain_urn("hr") + + pipeline_context = PipelineContext(run_id="multiple_matches_pipeline") + pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + + # Return fake aspect to simulate server behaviour + def fake_get_tags(entity_urn: str) -> models.GlobalTagsClass: + return models.GlobalTagsClass( + tags=[TagAssociationClass(tag=tag_one), TagAssociationClass(tag=tag_two)] + ) + + # Return fake aspect to simulate server behaviour + def fake_get_domain(entity_urn: str) -> models.DomainsClass: + return models.DomainsClass(domains=[existing_domain]) + + pipeline_context.graph.get_tags = fake_get_tags # type: ignore + pipeline_context.graph.get_domain = fake_get_domain # type: ignore + + output = run_dataset_transformer_pipeline( + transformer_type=DatasetTagDomainMapper, + aspect=models.DomainsClass(domains=[existing_domain]), + config={ + "domain_mapping": {"test:tag_1": finance, "test:tag_2": hr}, + "semantics": "PATCH", + }, + pipeline_context=pipeline_context, + ) + + # Assertions to verify the expected outcome + assert len(output) == 2 + assert output[0].record is not None + assert output[0].record.aspect is not None + assert isinstance(output[0].record.aspect, models.DomainsClass) + transformed_aspect = cast(models.DomainsClass, output[0].record.aspect) + + # Expecting domains from both matched tags + assert set(output[0].record.aspect.domains) == {existing_domain, finance, hr} + assert len(transformed_aspect.domains) == 3 + + +def test_domain_mapping_based_on_tags_with_empty_tags(mock_datahub_graph): + acryl_domain = builder.make_domain_urn("acryl.io") + server_domain = builder.make_domain_urn("test.io") + pipeline_context = PipelineContext(run_id="empty_config_pipeline") + pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + + # Return fake aspect to simulate server behaviour + def fake_get_tags(entity_urn: str) -> models.GlobalTagsClass: + return models.GlobalTagsClass(tags=[]) + + pipeline_context.graph.get_tags = fake_get_tags # type: ignore + + output = run_dataset_transformer_pipeline( + transformer_type=DatasetTagDomainMapper, + aspect=models.DomainsClass(domains=[acryl_domain]), + config={"domain_mapping": {"test:tag_1": server_domain}}, + pipeline_context=pipeline_context, + ) + + assert len(output) == 2 + assert isinstance(output[0].record.aspect, models.DomainsClass) + assert len(output[0].record.aspect.domains) == 1 + transformed_aspect = cast(models.DomainsClass, output[0].record.aspect) + assert len(transformed_aspect.domains) == 1 + assert acryl_domain in transformed_aspect.domains + assert server_domain not in transformed_aspect.domains + + +def test_domain_mapping_based_on_tags_with_no_tags(mock_datahub_graph): + acryl_domain = builder.make_domain_urn("acryl.io") + server_domain = builder.make_domain_urn("test.io") + pipeline_context = PipelineContext(run_id="empty_config_pipeline") + pipeline_context.graph = mock_datahub_graph(DatahubClientConfig()) + + # Return fake aspect to simulate server behaviour + def fake_get_tags(entity_urn: str) -> Optional[models.GlobalTagsClass]: + return None + + pipeline_context.graph.get_tags = fake_get_tags # type: ignore + + output = run_dataset_transformer_pipeline( + transformer_type=DatasetTagDomainMapper, + aspect=models.DomainsClass(domains=[acryl_domain]), + config={"domain_mapping": {"test:tag_1": server_domain}}, + pipeline_context=pipeline_context, + ) + + assert len(output) == 2 + assert isinstance(output[0].record.aspect, models.DomainsClass) + assert len(output[0].record.aspect.domains) == 1 + transformed_aspect = cast(models.DomainsClass, output[0].record.aspect) + assert len(transformed_aspect.domains) == 1 + assert acryl_domain in transformed_aspect.domains + assert server_domain not in transformed_aspect.domains diff --git a/metadata-ingestion/tests/unit/utilities/test_advanced_thread_executor.py b/metadata-ingestion/tests/unit/utilities/test_advanced_thread_executor.py index ae4616c604a61..7b51c18a85c5f 100644 --- a/metadata-ingestion/tests/unit/utilities/test_advanced_thread_executor.py +++ b/metadata-ingestion/tests/unit/utilities/test_advanced_thread_executor.py @@ -77,12 +77,12 @@ def test_backpressure_aware_executor_simple(): def task(i): return i - assert set( + assert { res.result() for res in BackpressureAwareExecutor.map( task, ((i,) for i in range(10)), max_workers=2 ) - ) == set(range(10)) + } == set(range(10)) def test_backpressure_aware_executor_advanced(): @@ -119,7 +119,7 @@ def task(x, y): assert 2 <= len(executed) <= 4 # Finally, consume the rest of the results. - assert set(r.result() for r in results) == { + assert {r.result() for r in results} == { i for i in range(10) if i != first_result.result() } diff --git a/metadata-ingestion/tests/unit/utilities/test_ratelimiter.py b/metadata-ingestion/tests/unit/utilities/test_ratelimiter.py index 0384e1f918881..bc915e21389a7 100644 --- a/metadata-ingestion/tests/unit/utilities/test_ratelimiter.py +++ b/metadata-ingestion/tests/unit/utilities/test_ratelimiter.py @@ -8,7 +8,7 @@ def test_rate_is_limited(): MAX_CALLS_PER_SEC = 5 TOTAL_CALLS = 18 - actual_calls: Dict[float, int] = defaultdict(lambda: 0) + actual_calls: Dict[float, int] = defaultdict(int) ratelimiter = RateLimiter(max_calls=MAX_CALLS_PER_SEC, period=1) for _ in range(TOTAL_CALLS): diff --git a/metadata-integration/java/custom-plugin-lib/build.gradle b/metadata-integration/java/custom-plugin-lib/build.gradle new file mode 100644 index 0000000000000..9fbe1066706be --- /dev/null +++ b/metadata-integration/java/custom-plugin-lib/build.gradle @@ -0,0 +1,116 @@ +plugins { + id 'java-library' + id 'com.github.johnrengelman.shadow' + id 'signing' + id 'io.codearte.nexus-staging' + id 'maven-publish' +} + +apply from: "../versioning.gradle" + +jar.enabled = false // Since we only want to build shadow jars, disabling the regular jar creation + +// only include since required registry file +processResources { + from("${project(':metadata-models').projectDir}/src/main/resources/entity-registry.yml") +} + +dependencies { + // Required for custom code plugins + api(project(':entity-registry')) { + // only include dataTemplate (and resources/entity-registry.yml from above) + exclude module: 'metadata-models' + } + implementation project(path: ':metadata-models', configuration: 'dataTemplate') + + // Required for MCL/MCP hooks + implementation(project(':metadata-io:metadata-io-api')) { + transitive = false + } + + // utility classes + implementation(project(':metadata-utils')) { + transitive = false + } +} + +configurations.all { + exclude group: 'org.antlr' +} + +shadowJar { + zip64 = true + archiveClassifier = '' + // preventing java multi-release JAR leakage + // https://github.com/johnrengelman/shadow/issues/729 + exclude('module-info.class', 'META-INF/versions/**', + '**/LICENSE', '**/LICENSE*.txt', '**/NOTICE', '**/NOTICE.txt', 'licenses/**', 'log4j2.*', 'log4j.*') + relocate 'com.fasterxml.jackson', 'datahub.shaded.jackson' + mergeServiceFiles() +} + +publishing { + publications { + shadow(MavenPublication) { publication -> + project.shadow.component(publication) + pom { + name = 'DataHub Custom Plugin Dependency' + group = 'io.acryl' + artifactId = 'datahub-custom-plugin-lib' + description = 'DataHub Java Custom Plugin dependencies' + url = 'https://datahubproject.io' + artifacts = [shadowJar] + + scm { + connection = 'scm:git:git://github.com/datahub-project/datahub.git' + developerConnection = 'scm:git:ssh://github.com:datahub-project/datahub.git' + url = 'https://github.com/datahub-project/datahub.git' + } + + licenses { + license { + name = 'The Apache License, Version 2.0' + url = 'http://www.apache.org/licenses/LICENSE-2.0.txt' + } + } + + developers { + developer { + id = 'datahub' + name = 'Datahub' + email = 'datahub@acryl.io' + } + } + } + } + } + + repositories { +/* maven { + def releasesRepoUrl = "https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/" + def snapshotsRepoUrl = "https://s01.oss.sonatype.org/content/repositories/snapshots/" + def ossrhUsername = System.getenv('RELEASE_USERNAME') + def ossrhPassword = System.getenv('RELEASE_PASSWORD') + credentials { + username ossrhUsername + password ossrhPassword + } + url = version.endsWith('SNAPSHOT') ? snapshotsRepoUrl : releasesRepoUrl + }*/ + } +} + +signing { + required { gradle.taskGraph.hasTask("publish") } + def signingKey = findProperty("signingKey") + def signingPassword = System.getenv("SIGNING_PASSWORD") + useInMemoryPgpKeys(signingKey, signingPassword) + sign publishing.publications.shadow +} + +nexusStaging { + serverUrl = "https://s01.oss.sonatype.org/service/local/" + //required only for projects registered in Sonatype after 2021-02-24 + username = System.getenv("NEXUS_USERNAME") + password = System.getenv("NEXUS_PASSWORD") +} \ No newline at end of file diff --git a/metadata-integration/java/datahub-client/build.gradle b/metadata-integration/java/datahub-client/build.gradle index eee84b1f8c827..53c2a33828907 100644 --- a/metadata-integration/java/datahub-client/build.gradle +++ b/metadata-integration/java/datahub-client/build.gradle @@ -27,7 +27,7 @@ dependencies { } } - compileOnly externalDependency.httpAsyncClient + compileOnly externalDependency.httpClient implementation externalDependency.jacksonDataBind runtimeOnly externalDependency.jna @@ -41,7 +41,7 @@ dependencies { testImplementation externalDependency.mockServer testImplementation externalDependency.mockServerClient testImplementation externalDependency.testContainers - testImplementation externalDependency.httpAsyncClient + testImplementation externalDependency.httpClient testRuntimeOnly externalDependency.logbackClassic } @@ -118,7 +118,8 @@ shadowJar { relocate 'ch.randelshofer', 'datahub.shaded.ch.randelshofer' relocate 'io.github.classgraph', 'datahub.shaded.io.github.classgraph' relocate 'nonapi.io.github.classgraph', 'datahub.shaded.nonapi.io.github.classgraph' - relocate 'com.github.fge', 'datahub.shaded.com.github.fge' + relocate 'org.eclipse.parsson', 'datahub.shaded.parsson' + relocate 'jakarta.json', 'datahub.shaded.json' finalizedBy checkShadowJar } diff --git a/metadata-integration/java/datahub-client/src/main/java/datahub/client/MetadataResponseFuture.java b/metadata-integration/java/datahub-client/src/main/java/datahub/client/MetadataResponseFuture.java index 89db9738efda6..11be10186f1ef 100644 --- a/metadata-integration/java/datahub-client/src/main/java/datahub/client/MetadataResponseFuture.java +++ b/metadata-integration/java/datahub-client/src/main/java/datahub/client/MetadataResponseFuture.java @@ -7,16 +7,16 @@ import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicReference; import lombok.SneakyThrows; -import org.apache.http.HttpResponse; +import org.apache.hc.client5.http.async.methods.SimpleHttpResponse; public class MetadataResponseFuture implements Future { - private final Future requestFuture; + private final Future requestFuture; private final AtomicReference responseReference; private final CountDownLatch responseLatch; private final ResponseMapper mapper; public MetadataResponseFuture( - Future underlyingFuture, + Future underlyingFuture, AtomicReference responseAtomicReference, CountDownLatch responseLatch) { this.requestFuture = underlyingFuture; @@ -25,7 +25,8 @@ public MetadataResponseFuture( this.mapper = null; } - public MetadataResponseFuture(Future underlyingFuture, ResponseMapper mapper) { + public MetadataResponseFuture( + Future underlyingFuture, ResponseMapper mapper) { this.requestFuture = underlyingFuture; this.responseReference = null; this.responseLatch = null; @@ -50,7 +51,7 @@ public boolean isDone() { @SneakyThrows @Override public MetadataWriteResponse get() throws InterruptedException, ExecutionException { - HttpResponse response = requestFuture.get(); + SimpleHttpResponse response = requestFuture.get(); if (mapper != null) { return mapper.map(response); } else { @@ -63,7 +64,7 @@ public MetadataWriteResponse get() throws InterruptedException, ExecutionExcepti @Override public MetadataWriteResponse get(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException, TimeoutException { - HttpResponse response = requestFuture.get(timeout, unit); + SimpleHttpResponse response = requestFuture.get(timeout, unit); if (mapper != null) { return mapper.map(response); } else { @@ -75,6 +76,6 @@ public MetadataWriteResponse get(long timeout, TimeUnit unit) @FunctionalInterface public interface ResponseMapper { - MetadataWriteResponse map(HttpResponse httpResponse); + MetadataWriteResponse map(SimpleHttpResponse httpResponse); } } diff --git a/metadata-integration/java/datahub-client/src/main/java/datahub/client/rest/DatahubHttpRequestRetryStrategy.java b/metadata-integration/java/datahub-client/src/main/java/datahub/client/rest/DatahubHttpRequestRetryStrategy.java new file mode 100644 index 0000000000000..71a4b93baf48f --- /dev/null +++ b/metadata-integration/java/datahub-client/src/main/java/datahub/client/rest/DatahubHttpRequestRetryStrategy.java @@ -0,0 +1,54 @@ +package datahub.client.rest; + +import java.io.IOException; +import java.io.InterruptedIOException; +import java.net.ConnectException; +import java.net.NoRouteToHostException; +import java.net.UnknownHostException; +import java.util.Arrays; +import javax.net.ssl.SSLException; +import lombok.extern.slf4j.Slf4j; +import org.apache.hc.client5.http.impl.DefaultHttpRequestRetryStrategy; +import org.apache.hc.core5.http.ConnectionClosedException; +import org.apache.hc.core5.http.HttpRequest; +import org.apache.hc.core5.http.HttpResponse; +import org.apache.hc.core5.http.HttpStatus; +import org.apache.hc.core5.http.protocol.HttpContext; +import org.apache.hc.core5.util.TimeValue; + +@Slf4j +public class DatahubHttpRequestRetryStrategy extends DefaultHttpRequestRetryStrategy { + public DatahubHttpRequestRetryStrategy() { + this(1, TimeValue.ofSeconds(10)); + } + + public DatahubHttpRequestRetryStrategy(int maxRetries, TimeValue retryInterval) { + super( + maxRetries, + retryInterval, + Arrays.asList( + InterruptedIOException.class, + UnknownHostException.class, + ConnectException.class, + ConnectionClosedException.class, + NoRouteToHostException.class, + SSLException.class), + Arrays.asList( + HttpStatus.SC_TOO_MANY_REQUESTS, + HttpStatus.SC_SERVICE_UNAVAILABLE, + HttpStatus.SC_INTERNAL_SERVER_ERROR)); + } + + @Override + public boolean retryRequest( + HttpRequest request, IOException exception, int execCount, HttpContext context) { + log.warn("Checking if retry is needed: {}", execCount); + return super.retryRequest(request, exception, execCount, context); + } + + @Override + public boolean retryRequest(HttpResponse response, int execCount, HttpContext context) { + log.warn("Retrying request due to error: {}", response); + return super.retryRequest(response, execCount, context); + } +} diff --git a/metadata-integration/java/datahub-client/src/main/java/datahub/client/rest/RestEmitter.java b/metadata-integration/java/datahub-client/src/main/java/datahub/client/rest/RestEmitter.java index a2692c432513e..ed4cee060bd69 100644 --- a/metadata-integration/java/datahub-client/src/main/java/datahub/client/rest/RestEmitter.java +++ b/metadata-integration/java/datahub-client/src/main/java/datahub/client/rest/RestEmitter.java @@ -18,31 +18,35 @@ import datahub.event.UpsertAspectRequest; import java.io.ByteArrayOutputStream; import java.io.IOException; -import java.io.InputStream; import java.security.KeyManagementException; import java.security.KeyStoreException; import java.security.NoSuchAlgorithmException; import java.util.List; +import java.util.Objects; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Consumer; import javax.annotation.concurrent.ThreadSafe; +import javax.net.ssl.SSLContext; import lombok.extern.slf4j.Slf4j; -import org.apache.http.HttpResponse; -import org.apache.http.HttpStatus; -import org.apache.http.client.config.RequestConfig; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.client.methods.HttpPost; -import org.apache.http.concurrent.FutureCallback; -import org.apache.http.conn.ssl.NoopHostnameVerifier; -import org.apache.http.conn.ssl.TrustAllStrategy; -import org.apache.http.entity.StringEntity; -import org.apache.http.impl.nio.client.CloseableHttpAsyncClient; -import org.apache.http.impl.nio.client.HttpAsyncClientBuilder; -import org.apache.http.nio.client.HttpAsyncClient; -import org.apache.http.ssl.SSLContextBuilder; +import org.apache.hc.client5.http.async.methods.SimpleHttpRequest; +import org.apache.hc.client5.http.async.methods.SimpleHttpResponse; +import org.apache.hc.client5.http.async.methods.SimpleRequestBuilder; +import org.apache.hc.client5.http.config.RequestConfig; +import org.apache.hc.client5.http.impl.async.CloseableHttpAsyncClient; +import org.apache.hc.client5.http.impl.async.HttpAsyncClientBuilder; +import org.apache.hc.client5.http.impl.nio.PoolingAsyncClientConnectionManagerBuilder; +import org.apache.hc.client5.http.ssl.ClientTlsStrategyBuilder; +import org.apache.hc.client5.http.ssl.NoopHostnameVerifier; +import org.apache.hc.client5.http.ssl.TrustAllStrategy; +import org.apache.hc.core5.concurrent.FutureCallback; +import org.apache.hc.core5.http.ContentType; +import org.apache.hc.core5.http.HttpStatus; +import org.apache.hc.core5.http.nio.ssl.TlsStrategy; +import org.apache.hc.core5.ssl.SSLContexts; +import org.apache.hc.core5.util.TimeValue; @ThreadSafe @Slf4j @@ -89,28 +93,43 @@ public RestEmitter(RestEmitterConfig config) { dataTemplateCodec = new JacksonDataTemplateCodec(objectMapper.getFactory()); this.config = config; + HttpAsyncClientBuilder httpClientBuilder = this.config.getAsyncHttpClientBuilder(); + httpClientBuilder.setRetryStrategy(new DatahubHttpRequestRetryStrategy()); + // Override httpClient settings with RestEmitter configs if present if (config.getTimeoutSec() != null) { - HttpAsyncClientBuilder httpClientBuilder = this.config.getAsyncHttpClientBuilder(); httpClientBuilder.setDefaultRequestConfig( RequestConfig.custom() - .setConnectTimeout(config.getTimeoutSec() * 1000) - .setSocketTimeout(config.getTimeoutSec() * 1000) + .setConnectionRequestTimeout( + config.getTimeoutSec() * 1000, java.util.concurrent.TimeUnit.MILLISECONDS) + .setResponseTimeout( + config.getTimeoutSec() * 1000, java.util.concurrent.TimeUnit.MILLISECONDS) .build()); } if (config.isDisableSslVerification()) { - HttpAsyncClientBuilder httpClientBuilder = this.config.getAsyncHttpClientBuilder(); try { - httpClientBuilder - .setSSLContext( - new SSLContextBuilder().loadTrustMaterial(null, TrustAllStrategy.INSTANCE).build()) - .setSSLHostnameVerifier(NoopHostnameVerifier.INSTANCE); + SSLContext sslcontext = + SSLContexts.custom().loadTrustMaterial(TrustAllStrategy.INSTANCE).build(); + TlsStrategy tlsStrategy = + ClientTlsStrategyBuilder.create() + .setSslContext(sslcontext) + .setHostnameVerifier(NoopHostnameVerifier.INSTANCE) + .build(); + + httpClientBuilder.setConnectionManager( + PoolingAsyncClientConnectionManagerBuilder.create() + .setTlsStrategy(tlsStrategy) + .build()); } catch (KeyManagementException | NoSuchAlgorithmException | KeyStoreException e) { throw new RuntimeException("Error while creating insecure http client", e); } } - this.httpClient = this.config.getAsyncHttpClientBuilder().build(); + httpClientBuilder.setRetryStrategy( + new DatahubHttpRequestRetryStrategy( + config.getMaxRetries(), TimeValue.ofSeconds(config.getRetryIntervalSec()))); + + this.httpClient = httpClientBuilder.build(); this.httpClient.start(); this.ingestProposalUrl = this.config.getServer() + "/aspects?action=ingestProposal"; this.ingestOpenApiUrl = config.getServer() + "/openapi/entities/v1/"; @@ -118,13 +137,11 @@ public RestEmitter(RestEmitterConfig config) { this.eventFormatter = this.config.getEventFormatter(); } - private static MetadataWriteResponse mapResponse(HttpResponse response) { + private static MetadataWriteResponse mapResponse(SimpleHttpResponse response) { MetadataWriteResponse.MetadataWriteResponseBuilder builder = MetadataWriteResponse.builder().underlyingResponse(response); - if ((response != null) - && (response.getStatusLine() != null) - && (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK - || response.getStatusLine().getStatusCode() == HttpStatus.SC_CREATED)) { + if ((response != null) && (response.getCode()) == HttpStatus.SC_OK + || Objects.requireNonNull(response).getCode() == HttpStatus.SC_CREATED) { builder.success(true); } else { builder.success(false); @@ -132,14 +149,7 @@ private static MetadataWriteResponse mapResponse(HttpResponse response) { // Read response content try { ByteArrayOutputStream result = new ByteArrayOutputStream(); - InputStream contentStream = response.getEntity().getContent(); - byte[] buffer = new byte[1024]; - int length = contentStream.read(buffer); - while (length > 0) { - result.write(buffer, 0, length); - length = contentStream.read(buffer); - } - builder.responseContent(result.toString("UTF-8")); + builder.responseContent(response.getBody().getBodyText()); } catch (Exception e) { // Catch all exceptions and still return a valid response object log.warn("Wasn't able to convert response into a string", e); @@ -198,21 +208,22 @@ public Future emit(MetadataChangeProposal mcp, Callback c private Future postGeneric( String urlStr, String payloadJson, Object originalRequest, Callback callback) throws IOException { - HttpPost httpPost = new HttpPost(urlStr); - httpPost.setHeader("Content-Type", "application/json"); - httpPost.setHeader("X-RestLi-Protocol-Version", "2.0.0"); - httpPost.setHeader("Accept", "application/json"); - this.config.getExtraHeaders().forEach((k, v) -> httpPost.setHeader(k, v)); + SimpleRequestBuilder simpleRequestBuilder = SimpleRequestBuilder.post(urlStr); + simpleRequestBuilder.setHeader("Content-Type", "application/json"); + simpleRequestBuilder.setHeader("X-RestLi-Protocol-Version", "2.0.0"); + simpleRequestBuilder.setHeader("Accept", "application/json"); + this.config.getExtraHeaders().forEach(simpleRequestBuilder::setHeader); if (this.config.getToken() != null) { - httpPost.setHeader("Authorization", "Bearer " + this.config.getToken()); + simpleRequestBuilder.setHeader("Authorization", "Bearer " + this.config.getToken()); } - httpPost.setEntity(new StringEntity(payloadJson)); + + simpleRequestBuilder.setBody(payloadJson, ContentType.APPLICATION_JSON); AtomicReference responseAtomicReference = new AtomicReference<>(); CountDownLatch responseLatch = new CountDownLatch(1); - FutureCallback httpCallback = - new FutureCallback() { + FutureCallback httpCallback = + new FutureCallback() { @Override - public void completed(HttpResponse response) { + public void completed(SimpleHttpResponse response) { MetadataWriteResponse writeResponse = null; try { writeResponse = mapResponse(response); @@ -252,16 +263,20 @@ public void cancelled() { } } }; - Future requestFuture = httpClient.execute(httpPost, httpCallback); + Future requestFuture = + httpClient.execute(simpleRequestBuilder.build(), httpCallback); return new MetadataResponseFuture(requestFuture, responseAtomicReference, responseLatch); } private Future getGeneric(String urlStr) throws IOException { - HttpGet httpGet = new HttpGet(urlStr); - httpGet.setHeader("Content-Type", "application/json"); - httpGet.setHeader("X-RestLi-Protocol-Version", "2.0.0"); - httpGet.setHeader("Accept", "application/json"); - Future response = this.httpClient.execute(httpGet, null); + SimpleHttpRequest simpleHttpRequest = + SimpleRequestBuilder.get(urlStr) + .addHeader("Content-Type", "application/json") + .addHeader("X-RestLi-Protocol-Version", "2.0.0") + .addHeader("Accept", "application/json") + .build(); + + Future response = this.httpClient.execute(simpleHttpRequest, null); return new MetadataResponseFuture(response, RestEmitter::mapResponse); } @@ -284,20 +299,25 @@ public Future emit(List request, Cal private Future postOpenAPI( List payload, Callback callback) throws IOException { - HttpPost httpPost = new HttpPost(ingestOpenApiUrl); - httpPost.setHeader("Content-Type", "application/json"); - httpPost.setHeader("Accept", "application/json"); - this.config.getExtraHeaders().forEach((k, v) -> httpPost.setHeader(k, v)); + SimpleRequestBuilder simpleRequestBuilder = + SimpleRequestBuilder.post(ingestOpenApiUrl) + .addHeader("Content-Type", "application/json") + .addHeader("Accept", "application/json") + .addHeader("X-RestLi-Protocol-Version", "2.0.0"); + + this.config.getExtraHeaders().forEach(simpleRequestBuilder::addHeader); + if (this.config.getToken() != null) { - httpPost.setHeader("Authorization", "Bearer " + this.config.getToken()); + simpleRequestBuilder.addHeader("Authorization", "Bearer " + this.config.getToken()); } - httpPost.setEntity(new StringEntity(objectMapper.writeValueAsString(payload))); + simpleRequestBuilder.setBody( + objectMapper.writeValueAsString(payload), ContentType.APPLICATION_JSON); AtomicReference responseAtomicReference = new AtomicReference<>(); CountDownLatch responseLatch = new CountDownLatch(1); - FutureCallback httpCallback = - new FutureCallback() { + FutureCallback httpCallback = + new FutureCallback() { @Override - public void completed(HttpResponse response) { + public void completed(SimpleHttpResponse response) { MetadataWriteResponse writeResponse = null; try { writeResponse = mapResponse(response); @@ -337,12 +357,13 @@ public void cancelled() { } } }; - Future requestFuture = httpClient.execute(httpPost, httpCallback); + Future requestFuture = + httpClient.execute(simpleRequestBuilder.build(), httpCallback); return new MetadataResponseFuture(requestFuture, responseAtomicReference, responseLatch); } @VisibleForTesting - HttpAsyncClient getHttpClient() { + CloseableHttpAsyncClient getHttpClient() { return this.httpClient; } } diff --git a/metadata-integration/java/datahub-client/src/main/java/datahub/client/rest/RestEmitterConfig.java b/metadata-integration/java/datahub-client/src/main/java/datahub/client/rest/RestEmitterConfig.java index 7e24429213246..e28ad4ed660f0 100644 --- a/metadata-integration/java/datahub-client/src/main/java/datahub/client/rest/RestEmitterConfig.java +++ b/metadata-integration/java/datahub-client/src/main/java/datahub/client/rest/RestEmitterConfig.java @@ -10,8 +10,10 @@ import lombok.NonNull; import lombok.Value; import lombok.extern.slf4j.Slf4j; -import org.apache.http.client.config.RequestConfig; -import org.apache.http.impl.nio.client.HttpAsyncClientBuilder; +import org.apache.hc.client5.http.config.RequestConfig; +import org.apache.hc.client5.http.impl.async.HttpAsyncClientBuilder; +import org.apache.hc.client5.http.impl.async.HttpAsyncClients; +import org.apache.hc.core5.util.TimeValue; @Value @Builder @@ -23,20 +25,23 @@ public class RestEmitterConfig { public static final String DEFAULT_AUTH_TOKEN = null; public static final String CLIENT_VERSION_PROPERTY = "clientVersion"; - @Builder.Default private final String server = "http://localhost:8080"; + @Builder.Default String server = "http://localhost:8080"; - private final Integer timeoutSec; - @Builder.Default private final boolean disableSslVerification = false; + Integer timeoutSec; + @Builder.Default boolean disableSslVerification = false; - @Builder.Default private final String token = DEFAULT_AUTH_TOKEN; + @Builder.Default int maxRetries = 0; - @Builder.Default @NonNull private final Map extraHeaders = Collections.EMPTY_MAP; + @Builder.Default int retryIntervalSec = 10; - private final HttpAsyncClientBuilder asyncHttpClientBuilder; + @Builder.Default String token = DEFAULT_AUTH_TOKEN; + + @Builder.Default @NonNull Map extraHeaders = Collections.EMPTY_MAP; @Builder.Default - private final EventFormatter eventFormatter = - new EventFormatter(EventFormatter.Format.PEGASUS_JSON); + EventFormatter eventFormatter = new EventFormatter(EventFormatter.Format.PEGASUS_JSON); + + HttpAsyncClientBuilder asyncHttpClientBuilder; public static class RestEmitterConfigBuilder { @@ -53,13 +58,19 @@ private String getVersion() { } private HttpAsyncClientBuilder asyncHttpClientBuilder = - HttpAsyncClientBuilder.create() + HttpAsyncClients.custom() + .setUserAgent("DataHub-RestClient/" + getVersion()) .setDefaultRequestConfig( RequestConfig.custom() - .setConnectTimeout(DEFAULT_CONNECT_TIMEOUT_SEC * 1000) - .setSocketTimeout(DEFAULT_READ_TIMEOUT_SEC * 1000) + .setConnectionRequestTimeout( + DEFAULT_CONNECT_TIMEOUT_SEC * 1000, + java.util.concurrent.TimeUnit.MILLISECONDS) + .setResponseTimeout( + DEFAULT_READ_TIMEOUT_SEC * 1000, java.util.concurrent.TimeUnit.MILLISECONDS) .build()) - .setUserAgent("DataHub-RestClient/" + getVersion()); + .setRetryStrategy( + new DatahubHttpRequestRetryStrategy( + maxRetries$value, TimeValue.ofSeconds(retryIntervalSec$value))); public RestEmitterConfigBuilder with(Consumer builderFunction) { builderFunction.accept(this); diff --git a/metadata-integration/java/datahub-client/src/test/java/datahub/client/rest/RestEmitterTest.java b/metadata-integration/java/datahub-client/src/test/java/datahub/client/rest/RestEmitterTest.java index 657669d19439c..a22b2736e750d 100644 --- a/metadata-integration/java/datahub-client/src/test/java/datahub/client/rest/RestEmitterTest.java +++ b/metadata-integration/java/datahub-client/src/test/java/datahub/client/rest/RestEmitterTest.java @@ -11,8 +11,8 @@ import datahub.event.MetadataChangeProposalWrapper; import datahub.server.TestDataHubServer; import java.io.IOException; -import java.io.InputStream; import java.net.SocketTimeoutException; +import java.net.URI; import java.net.URISyntaxException; import java.nio.charset.StandardCharsets; import java.util.ArrayList; @@ -32,117 +32,148 @@ import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; import javax.net.ssl.SSLHandshakeException; -import org.apache.http.HttpResponse; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.client.methods.HttpPost; -import org.apache.http.concurrent.FutureCallback; -import org.apache.http.impl.nio.client.CloseableHttpAsyncClient; -import org.apache.http.impl.nio.client.HttpAsyncClientBuilder; +import org.apache.hc.client5.http.async.methods.SimpleHttpRequest; +import org.apache.hc.client5.http.async.methods.SimpleHttpResponse; +import org.apache.hc.core5.http.Method; import org.junit.Assert; -import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; -import org.mockito.ArgumentCaptor; -import org.mockito.Captor; -import org.mockito.Mock; -import org.mockito.Mockito; import org.mockito.junit.MockitoJUnitRunner; import org.mockserver.matchers.Times; +import org.mockserver.model.HttpError; import org.mockserver.model.HttpRequest; +import org.mockserver.model.HttpResponse; +import org.mockserver.model.HttpStatusCode; import org.mockserver.model.RequestDefinition; +import org.mockserver.verify.VerificationTimes; @RunWith(MockitoJUnitRunner.class) public class RestEmitterTest { - @Mock HttpAsyncClientBuilder mockHttpClientFactory; - - @Mock CloseableHttpAsyncClient mockClient; - - @Captor ArgumentCaptor postArgumentCaptor; - - @Captor ArgumentCaptor callbackCaptor; + @Test + public void testPost() + throws URISyntaxException, IOException, ExecutionException, InterruptedException { + TestDataHubServer testDataHubServer = new TestDataHubServer(); + Integer port = testDataHubServer.getMockServer().getPort(); + RestEmitter emitter = RestEmitter.create(b -> b.server("http://localhost:" + port)); - @Before - public void setupMocks() { - Mockito.when(mockHttpClientFactory.build()).thenReturn(mockClient); + MetadataChangeProposalWrapper mcp = + getMetadataChangeProposalWrapper( + "Test Dataset", "urn:li:dataset:(urn:li:dataPlatform:hive,foo.bar,PROD)"); + Future future = emitter.emit(mcp, null); + MetadataWriteResponse response = future.get(); + String expectedContent = + "{\"proposal\":{\"aspectName\":\"datasetProperties\"," + + "\"entityUrn\":\"urn:li:dataset:(urn:li:dataPlatform:hive,foo.bar,PROD)\"," + + "\"entityType\":\"dataset\",\"changeType\":\"UPSERT\",\"aspect\":{\"contentType\":\"application/json\"" + + ",\"value\":\"{\\\"description\\\":\\\"Test Dataset\\\"}\"}}}"; + testDataHubServer + .getMockServer() + .verify( + request().withHeader("X-RestLi-Protocol-Version", "2.0.0").withBody(expectedContent)); } @Test - public void testPost() throws URISyntaxException, IOException { + public void testPostWithRetry() + throws URISyntaxException, IOException, ExecutionException, InterruptedException { + TestDataHubServer testDataHubServer = new TestDataHubServer(); + Integer port = testDataHubServer.getMockServer().getPort(); + RestEmitterConfig config = + RestEmitterConfig.builder() + .server("http://localhost:" + port) + .maxRetries(3) + .retryIntervalSec(1) + .build(); + RestEmitter emitter = new RestEmitter(config); - RestEmitter emitter = RestEmitter.create(b -> b.asyncHttpClientBuilder(mockHttpClientFactory)); MetadataChangeProposalWrapper mcp = getMetadataChangeProposalWrapper( "Test Dataset", "urn:li:dataset:(urn:li:dataPlatform:hive,foo.bar,PROD)"); - emitter.emit(mcp, null); - Mockito.verify(mockClient).execute(postArgumentCaptor.capture(), callbackCaptor.capture()); - FutureCallback callback = callbackCaptor.getValue(); - Assert.assertNotNull(callback); - HttpPost testPost = postArgumentCaptor.getValue(); - Assert.assertEquals("2.0.0", testPost.getFirstHeader("X-RestLi-Protocol-Version").getValue()); - InputStream is = testPost.getEntity().getContent(); - byte[] contentBytes = new byte[(int) testPost.getEntity().getContentLength()]; - is.read(contentBytes); - String contentString = new String(contentBytes, StandardCharsets.UTF_8); + Future future = emitter.emit(mcp, null); + MetadataWriteResponse response = future.get(); String expectedContent = "{\"proposal\":{\"aspectName\":\"datasetProperties\"," + "\"entityUrn\":\"urn:li:dataset:(urn:li:dataPlatform:hive,foo.bar,PROD)\"," + "\"entityType\":\"dataset\",\"changeType\":\"UPSERT\",\"aspect\":{\"contentType\":\"application/json\"" + ",\"value\":\"{\\\"description\\\":\\\"Test Dataset\\\"}\"}}}"; - Assert.assertEquals(expectedContent, contentString); + testDataHubServer + .getMockServer() + .verify( + request().withHeader("X-RestLi-Protocol-Version", "2.0.0").withBody(expectedContent), + VerificationTimes.exactly(1)) + .when( + request() + .withPath("/aspect") + .withHeader("X-RestLi-Protocol-Version", "2.0.0") + .withBody(expectedContent), + Times.exactly(4)) + .respond(HttpResponse.response().withStatusCode(500).withBody("exception")); } @Test public void testExceptions() throws URISyntaxException, IOException, ExecutionException, InterruptedException { - - RestEmitter emitter = RestEmitter.create($ -> $.asyncHttpClientBuilder(mockHttpClientFactory)); + TestDataHubServer testDataHubServer = new TestDataHubServer(); + Integer port = testDataHubServer.getMockServer().getPort(); + RestEmitter emitter = + RestEmitter.create( + b -> + b.server("http://localhost:" + port) + .extraHeaders(Collections.singletonMap("Test-Header", "Test-Value"))); MetadataChangeProposalWrapper mcp = - MetadataChangeProposalWrapper.create( - b -> - b.entityType("dataset") - .entityUrn("urn:li:dataset:(urn:li:dataPlatform:hive,foo.bar,PROD)") - .upsert() - .aspect(new DatasetProperties().setDescription("Test Dataset"))); - - Future mockFuture = Mockito.mock(Future.class); - Mockito.when(mockClient.execute(Mockito.any(), Mockito.any())).thenReturn(mockFuture); - Mockito.when(mockFuture.get()) - .thenThrow(new ExecutionException("Test execution exception", null)); + getMetadataChangeProposalWrapper( + "Test Dataset", "urn:li:dataset:(urn:li:dataPlatform:hive,foo.bar,PROD)"); + Future future = emitter.emit(mcp, null); + MetadataWriteResponse response = future.get(); + String expectedContent = + "{\"proposal\":{\"aspectName\":\"datasetProperties\"," + + "\"entityUrn\":\"urn:li:dataset:(urn:li:dataPlatform:hive,foo.bar,PROD)\"," + + "\"entityType\":\"dataset\",\"changeType\":\"UPSERT\",\"aspect\":{\"contentType\":\"application/json\"" + + ",\"value\":\"{\\\"description\\\":\\\"Test Dataset\\\"}\"}}}"; + testDataHubServer + .getMockServer() + .when(request(), Times.once()) + .error(HttpError.error().withDropConnection(true)); + try { emitter.emit(mcp, null).get(); Assert.fail("should not be here"); } catch (ExecutionException e) { - Assert.assertEquals(e.getMessage(), "Test execution exception"); + Assert.assertEquals( + e.getMessage(), + "org.apache.hc.core5.http.ConnectionClosedException: Connection closed by peer"); } } @Test - public void testExtraHeaders() throws Exception { + public void testExtraHeaders() + throws URISyntaxException, IOException, ExecutionException, InterruptedException { + TestDataHubServer testDataHubServer = new TestDataHubServer(); + Integer port = testDataHubServer.getMockServer().getPort(); RestEmitter emitter = RestEmitter.create( b -> - b.asyncHttpClientBuilder(mockHttpClientFactory) + b.server("http://localhost:" + port) .extraHeaders(Collections.singletonMap("Test-Header", "Test-Value"))); - MetadataChangeProposalWrapper mcpw = - MetadataChangeProposalWrapper.create( - b -> - b.entityType("dataset") - .entityUrn("urn:li:dataset:foo") - .upsert() - .aspect(new DatasetProperties())); - Future mockFuture = Mockito.mock(Future.class); - Mockito.when(mockClient.execute(Mockito.any(), Mockito.any())).thenReturn(mockFuture); - emitter.emit(mcpw, null); - Mockito.verify(mockClient).execute(postArgumentCaptor.capture(), callbackCaptor.capture()); - FutureCallback callback = callbackCaptor.getValue(); - Assert.assertNotNull(callback); - HttpPost testPost = postArgumentCaptor.getValue(); - // old headers are not modified - Assert.assertEquals("2.0.0", testPost.getFirstHeader("X-RestLi-Protocol-Version").getValue()); - // new headers are added - Assert.assertEquals("Test-Value", testPost.getFirstHeader("Test-Header").getValue()); + + MetadataChangeProposalWrapper mcp = + getMetadataChangeProposalWrapper( + "Test Dataset", "urn:li:dataset:(urn:li:dataPlatform:hive,foo.bar,PROD)"); + Future future = emitter.emit(mcp, null); + MetadataWriteResponse response = future.get(); + String expectedContent = + "{\"proposal\":{\"aspectName\":\"datasetProperties\"," + + "\"entityUrn\":\"urn:li:dataset:(urn:li:dataPlatform:hive,foo.bar,PROD)\"," + + "\"entityType\":\"dataset\",\"changeType\":\"UPSERT\",\"aspect\":{\"contentType\":\"application/json\"" + + ",\"value\":\"{\\\"description\\\":\\\"Test Dataset\\\"}\"}}}"; + testDataHubServer + .getMockServer() + .verify( + request() + .withHeader("Test-Header", "Test-Value") + .withHeader("X-RestLi-Protocol-Version", "2.0.0") + .withBody(expectedContent)); } @Test @@ -168,7 +199,7 @@ public void multithreadedTestExecutors() throws Exception { .withQueryStringParameter("action", "ingestProposal") .withHeader("Content-type", "application/json"), Times.unlimited()) - .respond(org.mockserver.model.HttpResponse.response().withStatusCode(200)); + .respond(HttpResponse.response().withStatusCode(200)); ExecutorService executor = Executors.newFixedThreadPool(10); ArrayList results = new ArrayList(); Random random = new Random(); @@ -476,26 +507,27 @@ public void testUserAgentHeader() throws IOException, ExecutionException, Interr @Test public void testDisableSslVerification() - throws IOException, InterruptedException, ExecutionException { + throws IOException, InterruptedException, ExecutionException, URISyntaxException { RestEmitter restEmitter = new RestEmitter(RestEmitterConfig.builder().disableSslVerification(true).build()); final String hostWithSsl = "https://self-signed.badssl.com"; - final HttpGet request = new HttpGet(hostWithSsl); + final SimpleHttpRequest request = SimpleHttpRequest.create(Method.GET, new URI(hostWithSsl)); - final HttpResponse response = restEmitter.getHttpClient().execute(request, null).get(); + final SimpleHttpResponse response = restEmitter.getHttpClient().execute(request, null).get(); restEmitter.close(); - Assert.assertEquals(200, response.getStatusLine().getStatusCode()); + Assert.assertEquals(HttpStatusCode.OK_200.code(), response.getCode()); } @Test public void testSslVerificationException() - throws IOException, InterruptedException, ExecutionException { + throws IOException, InterruptedException, ExecutionException, URISyntaxException { RestEmitter restEmitter = new RestEmitter(RestEmitterConfig.builder().disableSslVerification(false).build()); final String hostWithSsl = "https://self-signed.badssl.com"; - final HttpGet request = new HttpGet(hostWithSsl); + final SimpleHttpRequest request = SimpleHttpRequest.create(Method.GET, new URI(hostWithSsl)); + try { - HttpResponse response = restEmitter.getHttpClient().execute(request, null).get(); + SimpleHttpResponse response = restEmitter.getHttpClient().execute(request, null).get(); Assert.fail(); } catch (Exception e) { Assert.assertTrue(e instanceof ExecutionException); diff --git a/metadata-integration/java/datahub-event/build.gradle b/metadata-integration/java/datahub-event/build.gradle index a516b9d43da4b..395065404d1db 100644 --- a/metadata-integration/java/datahub-event/build.gradle +++ b/metadata-integration/java/datahub-event/build.gradle @@ -25,7 +25,7 @@ dependencies { testImplementation externalDependency.testng testImplementation externalDependency.mockito testImplementation externalDependency.testContainers - testImplementation externalDependency.httpAsyncClient + testImplementation externalDependency.httpClient testRuntimeOnly externalDependency.logbackClassicJava8 } diff --git a/metadata-integration/java/openlineage-converter/src/main/java/io/datahubproject/openlineage/converter/OpenLineageToDataHub.java b/metadata-integration/java/openlineage-converter/src/main/java/io/datahubproject/openlineage/converter/OpenLineageToDataHub.java index 038e8d33a97c4..59cac8719c303 100644 --- a/metadata-integration/java/openlineage-converter/src/main/java/io/datahubproject/openlineage/converter/OpenLineageToDataHub.java +++ b/metadata-integration/java/openlineage-converter/src/main/java/io/datahubproject/openlineage/converter/OpenLineageToDataHub.java @@ -621,6 +621,11 @@ private static void processParentJob( private static void processJobInputs( DatahubJob datahubJob, OpenLineage.RunEvent event, DatahubOpenlineageConfig datahubConf) { + + if (event.getInputs() == null) { + return; + } + for (OpenLineage.InputDataset input : event.getInputs().stream() .filter(input -> input.getFacets() != null) @@ -646,6 +651,11 @@ private static void processJobInputs( private static void processJobOutputs( DatahubJob datahubJob, OpenLineage.RunEvent event, DatahubOpenlineageConfig datahubConf) { + + if (event.getOutputs() == null) { + return; + } + for (OpenLineage.OutputDataset output : event.getOutputs().stream() .filter(input -> input.getFacets() != null) diff --git a/metadata-integration/java/spark-lineage-beta/README.md b/metadata-integration/java/spark-lineage-beta/README.md index 6a520071ba797..e09bc3938b686 100644 --- a/metadata-integration/java/spark-lineage-beta/README.md +++ b/metadata-integration/java/spark-lineage-beta/README.md @@ -24,7 +24,7 @@ When running jobs using spark-submit, the agent needs to be configured in the co ```text #Configuring DataHub spark agent jar -spark.jars.packages io.acryl:acryl-spark-lineage:0.2.1 +spark.jars.packages io.acryl:acryl-spark-lineage:0.2.3 spark.extraListeners datahub.spark.DatahubSparkListener spark.datahub.rest.server http://localhost:8080 ``` @@ -32,7 +32,7 @@ spark.datahub.rest.server http://localhost:8080 ## spark-submit command line ```sh -spark-submit --packages io.acryl:acryl-spark-lineage:0.2.1 --conf "spark.extraListeners=datahub.spark.DatahubSparkListener" my_spark_job_to_run.py +spark-submit --packages io.acryl:acryl-spark-lineage:0.2.3 --conf "spark.extraListeners=datahub.spark.DatahubSparkListener" my_spark_job_to_run.py ``` ### Configuration Instructions: Amazon EMR @@ -41,7 +41,7 @@ Set the following spark-defaults configuration properties as it stated [here](https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-spark-configure.html) ```text -spark.jars.packages io.acryl:acryl-spark-lineage:0.2.1 +spark.jars.packages io.acryl:acryl-spark-lineage:0.2.3 spark.extraListeners datahub.spark.DatahubSparkListener spark.datahub.rest.server https://your_datahub_host/gms #If you have authentication set up then you also need to specify the Datahub access token @@ -56,7 +56,7 @@ When running interactive jobs from a notebook, the listener can be configured wh spark = SparkSession.builder .master("spark://spark-master:7077") .appName("test-application") -.config("spark.jars.packages", "io.acryl:acryl-spark-lineage:0.1.0") +.config("spark.jars.packages", "io.acryl:acryl-spark-lineage:0.2.3") .config("spark.extraListeners", "datahub.spark.DatahubSparkListener") .config("spark.datahub.rest.server", "http://localhost:8080") .enableHiveSupport() @@ -79,7 +79,7 @@ appName("test-application") config("spark.master","spark://spark-master:7077") . -config("spark.jars.packages","io.acryl:acryl-spark-lineage:0.2.1") +config("spark.jars.packages","io.acryl:acryl-spark-lineage:0.2.3") . config("spark.extraListeners","datahub.spark.DatahubSparkListener") @@ -164,6 +164,8 @@ information like tokens. | spark.datahub.rest.server | ✅ | | Datahub server url eg: | | spark.datahub.rest.token | | | Authentication token. | | spark.datahub.rest.disable_ssl_verification | | false | Disable SSL certificate validation. Caution: Only use this if you know what you are doing! | +| spark.datahub.rest.rest.max_retries | | 0 | Number of times a request retried if failed | +| spark.datahub.rest.rest.retry_interval | | 10 | Number of seconds to wait between retries | | spark.datahub.metadata.pipeline.platformInstance | | | Pipeline level platform instance | | spark.datahub.metadata.dataset.platformInstance | | | dataset level platform instance | | spark.datahub.metadata.dataset.env | | PROD | [Supported values](https://datahubproject.io/docs/graphql/enums#fabrictype). In all other cases, will fallback to PROD | @@ -180,7 +182,7 @@ information like tokens. | spark.datahub.tags | | | Comma separated list of tags to attach to the DataFlow | | spark.datahub.domains | | | Comma separated list of domain urns to attach to the DataFlow | | spark.datahub.stage_metadata_coalescing | | | Normally it coalesce and send metadata at the onApplicationEnd event which is never called on Databricsk. You should enable this on Databricks if you want coalesced run . | -| spark.datahub.patch.enabled | | | Set this to true to send lineage as a patch, which appends rather than overwrites existing Dataset lineage edges. By default it is enabled. +| spark.datahub.patch.enabled | | false | Set this to true to send lineage as a patch, which appends rather than overwrites existing Dataset lineage edges. By default it is enabled. | ## What to Expect: The Metadata Model diff --git a/metadata-integration/java/spark-lineage-beta/build.gradle b/metadata-integration/java/spark-lineage-beta/build.gradle index 4cd2ddfec3dfc..d83753028d0b4 100644 --- a/metadata-integration/java/spark-lineage-beta/build.gradle +++ b/metadata-integration/java/spark-lineage-beta/build.gradle @@ -37,7 +37,7 @@ dependencies { provided(externalDependency.sparkSql) provided(externalDependency.sparkHive) implementation 'org.slf4j:slf4j-log4j12:2.0.7' - implementation externalDependency.httpAsyncClient + implementation externalDependency.httpClient implementation externalDependency.logbackClassicJava8 implementation externalDependency.typesafeConfig implementation externalDependency.commonsLang @@ -53,7 +53,7 @@ dependencies { implementation project(path: ':metadata-integration:java:openlineage-converter', configuration: 'shadow') //implementation "io.acryl:datahub-client:0.10.2" - implementation "io.openlineage:openlineage-spark:$openLineageVersion" + implementation "io.openlineage:openlineage-spark_2.12:$openLineageVersion" compileOnly "org.apache.iceberg:iceberg-spark3-runtime:0.12.1" compileOnly "org.apache.spark:spark-sql_2.12:3.1.3" @@ -123,7 +123,7 @@ shadowJar { relocate 'com.fasterxml.jackson', 'datahub.spark2.shaded.jackson' relocate 'org.slf4j', 'datahub.spark2.shaded.org.slf4j' // - relocate 'org.apache.http', 'io.acryl.shaded.http' + relocate 'org.apache.hc', 'io.acryl.shaded.http' relocate 'org.apache.commons.codec', 'datahub.spark2.shaded.o.a.c.codec' relocate 'org.apache.commons.compress', 'datahub.spark2.shaded.o.a.c.compress' relocate 'org.apache.commons.lang3', 'datahub.spark2.shaded.o.a.c.lang3' diff --git a/metadata-integration/java/spark-lineage-beta/src/main/java/datahub/spark/DatahubEventEmitter.java b/metadata-integration/java/spark-lineage-beta/src/main/java/datahub/spark/DatahubEventEmitter.java index 6b430c5c2ab26..1dc086e4af585 100644 --- a/metadata-integration/java/spark-lineage-beta/src/main/java/datahub/spark/DatahubEventEmitter.java +++ b/metadata-integration/java/spark-lineage-beta/src/main/java/datahub/spark/DatahubEventEmitter.java @@ -23,8 +23,8 @@ import io.datahubproject.openlineage.dataset.DatahubJob; import io.openlineage.client.OpenLineage; import io.openlineage.client.OpenLineageClientUtils; -import io.openlineage.spark.agent.ArgumentParser; import io.openlineage.spark.agent.EventEmitter; +import io.openlineage.spark.api.SparkOpenLineageConfig; import java.io.IOException; import java.net.URISyntaxException; import java.time.Instant; @@ -44,7 +44,6 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import lombok.extern.slf4j.Slf4j; -import org.apache.spark.SparkConf; import org.apache.spark.sql.streaming.StreamingQueryProgress; @Slf4j @@ -55,10 +54,11 @@ public class DatahubEventEmitter extends EventEmitter { private final Map schemaMap = new HashMap<>(); private SparkLineageConf datahubConf; - private EventFormatter eventFormatter = new EventFormatter(); + private final EventFormatter eventFormatter = new EventFormatter(); - public DatahubEventEmitter() throws URISyntaxException { - super(ArgumentParser.parse(new SparkConf())); + public DatahubEventEmitter(SparkOpenLineageConfig config, String applicationJobName) + throws URISyntaxException { + super(config, applicationJobName); } private Optional getEmitter() { @@ -167,7 +167,7 @@ public List generateCoalescedMcps() { List mcps = new ArrayList<>(); if (_datahubJobs.isEmpty()) { - log.warn("No lineage events to emit. Maybe the spark job finished premaraturely?"); + log.warn("No lineage events to emit. Maybe the spark job finished prematurely?"); return mcps; } diff --git a/metadata-integration/java/spark-lineage-beta/src/main/java/datahub/spark/DatahubSparkListener.java b/metadata-integration/java/spark-lineage-beta/src/main/java/datahub/spark/DatahubSparkListener.java index 060402723d194..38de142c4dd17 100644 --- a/metadata-integration/java/spark-lineage-beta/src/main/java/datahub/spark/DatahubSparkListener.java +++ b/metadata-integration/java/spark-lineage-beta/src/main/java/datahub/spark/DatahubSparkListener.java @@ -1,6 +1,7 @@ package datahub.spark; import static datahub.spark.conf.SparkConfigParser.*; +import static io.openlineage.spark.agent.util.ScalaConversionUtils.*; import com.typesafe.config.Config; import com.typesafe.config.ConfigFactory; @@ -10,16 +11,33 @@ import datahub.spark.conf.SparkAppContext; import datahub.spark.conf.SparkConfigParser; import datahub.spark.conf.SparkLineageConf; +import io.micrometer.core.instrument.MeterRegistry; +import io.micrometer.core.instrument.Tag; +import io.micrometer.core.instrument.Tags; +import io.micrometer.core.instrument.composite.CompositeMeterRegistry; +import io.openlineage.client.OpenLineageConfig; +import io.openlineage.client.circuitBreaker.CircuitBreaker; +import io.openlineage.client.circuitBreaker.CircuitBreakerFactory; +import io.openlineage.client.circuitBreaker.NoOpCircuitBreaker; +import io.openlineage.client.metrics.MicrometerProvider; +import io.openlineage.spark.agent.ArgumentParser; import io.openlineage.spark.agent.OpenLineageSparkListener; +import io.openlineage.spark.agent.Versions; import io.openlineage.spark.agent.lifecycle.ContextFactory; +import io.openlineage.spark.agent.util.ScalaConversionUtils; +import io.openlineage.spark.api.SparkOpenLineageConfig; import java.net.URISyntaxException; import java.time.Instant; import java.util.HashMap; import java.util.Map; import java.util.Optional; import java.util.Properties; +import org.apache.spark.SparkConf; +import org.apache.spark.SparkContext; +import org.apache.spark.SparkContext$; import org.apache.spark.SparkEnv; import org.apache.spark.SparkEnv$; +import org.apache.spark.package$; import org.apache.spark.scheduler.SparkListener; import org.apache.spark.scheduler.SparkListenerApplicationEnd; import org.apache.spark.scheduler.SparkListenerApplicationStart; @@ -30,20 +48,28 @@ import org.apache.spark.sql.streaming.StreamingQueryListener; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import scala.Function0; +import scala.Option; public class DatahubSparkListener extends SparkListener { private static final Logger log = LoggerFactory.getLogger(DatahubSparkListener.class); private final Map batchLastUpdated = new HashMap(); private final OpenLineageSparkListener listener; - private final DatahubEventEmitter emitter; + private DatahubEventEmitter emitter; private Config datahubConf = ConfigFactory.empty(); private SparkAppContext appContext; + private static ContextFactory contextFactory; + private static CircuitBreaker circuitBreaker = new NoOpCircuitBreaker(); + private static final String sparkVersion = package$.MODULE$.SPARK_VERSION(); + + private final Function0> activeSparkContext = + ScalaConversionUtils.toScalaFn(SparkContext$.MODULE$::getActive); + + private static MeterRegistry meterRegistry; + private boolean isDisabled; public DatahubSparkListener() throws URISyntaxException { listener = new OpenLineageSparkListener(); - emitter = new DatahubEventEmitter(); - ContextFactory contextFactory = new ContextFactory(emitter); - OpenLineageSparkListener.init(contextFactory); } private static SparkAppContext getSparkAppContext( @@ -61,13 +87,14 @@ private static SparkAppContext getSparkAppContext( public void onApplicationStart(SparkListenerApplicationStart applicationStart) { long startTime = System.currentTimeMillis(); + initializeContextFactoryIfNotInitialized(); - log.debug("Application start called"); + log.info("Application start called"); this.appContext = getSparkAppContext(applicationStart); listener.onApplicationStart(applicationStart); long elapsedTime = System.currentTimeMillis() - startTime; - log.debug("onApplicationStart completed successfully in {} ms", elapsedTime); + log.info("onApplicationStart completed successfully in {} ms", elapsedTime); } public Optional initializeEmitter(Config sparkConf) { @@ -87,6 +114,17 @@ public Optional initializeEmitter(Config sparkConf) { boolean disableSslVerification = sparkConf.hasPath(SparkConfigParser.DISABLE_SSL_VERIFICATION_KEY) && sparkConf.getBoolean(SparkConfigParser.DISABLE_SSL_VERIFICATION_KEY); + + int retry_interval_in_sec = + sparkConf.hasPath(SparkConfigParser.RETRY_INTERVAL_IN_SEC) + ? sparkConf.getInt(SparkConfigParser.RETRY_INTERVAL_IN_SEC) + : 5; + + int max_retries = + sparkConf.hasPath(SparkConfigParser.MAX_RETRIES) + ? sparkConf.getInt(SparkConfigParser.MAX_RETRIES) + : 0; + log.info( "REST Emitter Configuration: GMS url {}{}", gmsUrl, @@ -94,14 +132,18 @@ public Optional initializeEmitter(Config sparkConf) { if (token != null) { log.info("REST Emitter Configuration: Token {}", "XXXXX"); } + if (disableSslVerification) { log.warn("REST Emitter Configuration: ssl verification will be disabled."); } + RestEmitterConfig restEmitterConf = RestEmitterConfig.builder() .server(gmsUrl) .token(token) .disableSslVerification(disableSslVerification) + .maxRetries(max_retries) + .retryIntervalSec(retry_interval_in_sec) .build(); return Optional.of(new RestDatahubEmitterConfig(restEmitterConf)); } else { @@ -145,7 +187,12 @@ public void onApplicationEnd(SparkListenerApplicationEnd applicationEnd) { if (datahubConf.hasPath(STREAMING_JOB) && (datahubConf.getBoolean(STREAMING_JOB))) { return; } - emitter.emitCoalesced(); + if (emitter != null) { + emitter.emitCoalesced(); + } else { + log.warn("Emitter is not initialized, unable to emit coalesced events"); + } + long elapsedTime = System.currentTimeMillis() - startTime; log.debug("onApplicationEnd completed successfully in {} ms", elapsedTime); } @@ -170,6 +217,8 @@ public void onJobEnd(SparkListenerJobEnd jobEnd) { public void onJobStart(SparkListenerJobStart jobStart) { long startTime = System.currentTimeMillis(); + initializeContextFactoryIfNotInitialized(); + log.debug("Job start called"); loadDatahubConfig(this.appContext, jobStart.properties()); listener.onJobStart(jobStart); @@ -227,4 +276,72 @@ public void onOtherEvent(SparkListenerEvent event) { log.debug("onOtherEvent completed successfully in {} ms", elapsedTime); } } + + private static void initializeMetrics(OpenLineageConfig openLineageConfig) { + meterRegistry = + MicrometerProvider.addMeterRegistryFromConfig(openLineageConfig.getMetricsConfig()); + String disabledFacets; + if (openLineageConfig.getFacetsConfig() != null + && openLineageConfig.getFacetsConfig().getDisabledFacets() != null) { + disabledFacets = String.join(";", openLineageConfig.getFacetsConfig().getDisabledFacets()); + } else { + disabledFacets = ""; + } + meterRegistry + .config() + .commonTags( + Tags.of( + Tag.of("openlineage.spark.integration.version", Versions.getVersion()), + Tag.of("openlineage.spark.version", sparkVersion), + Tag.of("openlineage.spark.disabled.facets", disabledFacets))); + ((CompositeMeterRegistry) meterRegistry) + .getRegistries() + .forEach( + r -> + r.config() + .commonTags( + Tags.of( + Tag.of("openlineage.spark.integration.version", Versions.getVersion()), + Tag.of("openlineage.spark.version", sparkVersion), + Tag.of("openlineage.spark.disabled.facets", disabledFacets)))); + } + + private void initializeContextFactoryIfNotInitialized() { + if (contextFactory != null || isDisabled) { + return; + } + asJavaOptional(activeSparkContext.apply()) + .ifPresent(context -> initializeContextFactoryIfNotInitialized(context.appName())); + } + + private void initializeContextFactoryIfNotInitialized(String appName) { + if (contextFactory != null || isDisabled) { + return; + } + SparkEnv sparkEnv = SparkEnv$.MODULE$.get(); + if (sparkEnv == null) { + log.warn( + "OpenLineage listener instantiated, but no configuration could be found. " + + "Lineage events will not be collected"); + return; + } + initializeContextFactoryIfNotInitialized(sparkEnv.conf(), appName); + } + + private void initializeContextFactoryIfNotInitialized(SparkConf sparkConf, String appName) { + if (contextFactory != null || isDisabled) { + return; + } + try { + SparkOpenLineageConfig config = ArgumentParser.parse(sparkConf); + // Needs to be done before initializing OpenLineageClient + initializeMetrics(config); + emitter = new DatahubEventEmitter(config, appName); + contextFactory = new ContextFactory(emitter, meterRegistry, config); + circuitBreaker = new CircuitBreakerFactory(config.getCircuitBreaker()).build(); + OpenLineageSparkListener.init(contextFactory); + } catch (URISyntaxException e) { + log.error("Unable to parse OpenLineage endpoint. Lineage events will not be collected", e); + } + } } diff --git a/metadata-integration/java/spark-lineage-beta/src/main/java/datahub/spark/conf/SparkConfigParser.java b/metadata-integration/java/spark-lineage-beta/src/main/java/datahub/spark/conf/SparkConfigParser.java index 7e10f51feb38a..f1af56ff888d3 100644 --- a/metadata-integration/java/spark-lineage-beta/src/main/java/datahub/spark/conf/SparkConfigParser.java +++ b/metadata-integration/java/spark-lineage-beta/src/main/java/datahub/spark/conf/SparkConfigParser.java @@ -29,6 +29,9 @@ public class SparkConfigParser { public static final String GMS_URL_KEY = "rest.server"; public static final String GMS_AUTH_TOKEN = "rest.token"; public static final String DISABLE_SSL_VERIFICATION_KEY = "rest.disable_ssl_verification"; + public static final String MAX_RETRIES = "rest.max_retries"; + public static final String RETRY_INTERVAL_IN_SEC = "rest.retry_interval_in_sec"; + public static final String COALESCE_KEY = "coalesce_jobs"; public static final String PATCH_ENABLED = "patch.enabled"; @@ -304,7 +307,7 @@ public static boolean isCoalesceEnabled(Config datahubConfig) { public static boolean isPatchEnabled(Config datahubConfig) { if (!datahubConfig.hasPath(PATCH_ENABLED)) { - return true; + return false; } return datahubConfig.hasPath(PATCH_ENABLED) && datahubConfig.getBoolean(PATCH_ENABLED); } diff --git a/metadata-integration/java/spark-lineage-beta/src/main/java/io/openlineage/spark/agent/lifecycle/OpenLineageRunEventBuilder.java b/metadata-integration/java/spark-lineage-beta/src/main/java/io/openlineage/spark/agent/lifecycle/OpenLineageRunEventBuilder.java deleted file mode 100644 index 99643592dc200..0000000000000 --- a/metadata-integration/java/spark-lineage-beta/src/main/java/io/openlineage/spark/agent/lifecycle/OpenLineageRunEventBuilder.java +++ /dev/null @@ -1,493 +0,0 @@ -/* -/* Copyright 2018-2023 contributors to the OpenLineage project -/* SPDX-License-Identifier: Apache-2.0 -*/ - -package io.openlineage.spark.agent.lifecycle; - -import static io.openlineage.client.OpenLineageClientUtils.mergeFacets; -import static io.openlineage.spark.agent.util.ScalaConversionUtils.fromSeq; -import static io.openlineage.spark.agent.util.ScalaConversionUtils.toScalaFn; - -import io.openlineage.client.OpenLineage; -import io.openlineage.client.OpenLineage.DatasetFacet; -import io.openlineage.client.OpenLineage.DatasetFacets; -import io.openlineage.client.OpenLineage.InputDataset; -import io.openlineage.client.OpenLineage.InputDatasetFacet; -import io.openlineage.client.OpenLineage.InputDatasetInputFacets; -import io.openlineage.client.OpenLineage.JobBuilder; -import io.openlineage.client.OpenLineage.JobFacet; -import io.openlineage.client.OpenLineage.OutputDataset; -import io.openlineage.client.OpenLineage.OutputDatasetFacet; -import io.openlineage.client.OpenLineage.OutputDatasetOutputFacets; -import io.openlineage.client.OpenLineage.ParentRunFacet; -import io.openlineage.client.OpenLineage.RunEvent; -import io.openlineage.client.OpenLineage.RunEventBuilder; -import io.openlineage.client.OpenLineage.RunFacet; -import io.openlineage.client.OpenLineage.RunFacets; -import io.openlineage.client.OpenLineage.RunFacetsBuilder; -import io.openlineage.spark.agent.hooks.HookUtils; -import io.openlineage.spark.agent.lifecycle.plan.column.ColumnLevelLineageUtils; -import io.openlineage.spark.agent.lifecycle.plan.column.ColumnLevelLineageVisitor; -import io.openlineage.spark.agent.util.FacetUtils; -import io.openlineage.spark.agent.util.PlanUtils; -import io.openlineage.spark.agent.util.ScalaConversionUtils; -import io.openlineage.spark.api.CustomFacetBuilder; -import io.openlineage.spark.api.OpenLineageContext; -import io.openlineage.spark.api.OpenLineageEventHandlerFactory; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.stream.Collectors; -import java.util.stream.Stream; -import lombok.AllArgsConstructor; -import lombok.NonNull; -import lombok.extern.slf4j.Slf4j; -import org.apache.spark.rdd.RDD; -import org.apache.spark.scheduler.ActiveJob; -import org.apache.spark.scheduler.JobFailed; -import org.apache.spark.scheduler.SparkListenerJobEnd; -import org.apache.spark.scheduler.SparkListenerJobStart; -import org.apache.spark.scheduler.SparkListenerStageCompleted; -import org.apache.spark.scheduler.SparkListenerStageSubmitted; -import org.apache.spark.scheduler.Stage; -import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan; -import org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd; -import org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart; -import scala.Function1; -import scala.PartialFunction; - -/** - * Event handler that accepts various {@link org.apache.spark.scheduler.SparkListener} events and - * helps build up an {@link RunEvent} by passing event components to partial functions that know how - * to convert those event components into {@link RunEvent} properties. - * - *

The event types that can be consumed to generate @link OpenLineage.RunEvent} properties have - * no common supertype, so the generic argument for the function input is simply {@link Object}. The - * types of arguments that may be found include - * - *

    - *
  • {@link org.apache.spark.scheduler.StageInfo} - *
  • {@link Stage} - *
  • {@link RDD} - *
  • {@link ActiveJob} - *
  • {@link org.apache.spark.sql.execution.QueryExecution} - *
- * - *

These components are extracted from various {@link org.apache.spark.scheduler.SparkListener} - * events, such as {@link SparkListenerStageCompleted}, {@link SparkListenerJobStart}, and {@link - * org.apache.spark.scheduler.SparkListenerTaskEnd}. - * - *

{@link RDD} chains will be _flattened_ so each `RDD` dependency is passed to the builders one - * at a time. This means a builder can directly specify the type of {@link RDD} it handles, such as - * a {@link org.apache.spark.rdd.HadoopRDD} or a {@link - * org.apache.spark.sql.execution.datasources.FileScanRDD}, without having to check the dependencies - * of every {@link org.apache.spark.rdd.MapPartitionsRDD} or {@link - * org.apache.spark.sql.execution.SQLExecutionRDD}. - * - *

Any {@link RunFacet}s and {@link JobFacet}s returned by the {@link CustomFacetBuilder}s are - * appended to the {@link OpenLineage.Run} and {@link OpenLineage.Job}, respectively. - * - *

If any {@link OpenLineage.InputDatasetBuilder}s or {@link - * OpenLineage.OutputDatasetBuilder}s are returned from the partial functions, the {@link - * #inputDatasetBuilders} or {@link #outputDatasetBuilders} will be invoked using the same input - * arguments in order to construct any {@link InputDatasetFacet}s or {@link OutputDatasetFacet}s to - * the returned dataset. {@link InputDatasetFacet}s and {@link OutputDatasetFacet}s will be attached - * to any {@link OpenLineage.InputDatasetBuilder} or {@link OpenLineage.OutputDatasetBuilder} - * found for the event. This is because facets may be constructed from generic information that is - * not specifically tied to a Dataset. For example, {@link - * OpenLineage.OutputStatisticsOutputDatasetFacet}s are created from {@link - * org.apache.spark.executor.TaskMetrics} attached to the last {@link - * org.apache.spark.scheduler.StageInfo} for a given job execution. However, the {@link - * OutputDataset} is constructed by reading the {@link LogicalPlan}. There's no way to tie the - * output metrics in the {@link org.apache.spark.scheduler.StageInfo} to the {@link OutputDataset} - * in the {@link LogicalPlan} except by inference. Similarly, input metrics can be found in the - * {@link org.apache.spark.scheduler.StageInfo} for the stage that reads a dataset and the {@link - * InputDataset} can usually be constructed by walking the {@link RDD} dependency tree for that - * {@link Stage} and finding a {@link org.apache.spark.sql.execution.datasources.FileScanRDD} or - * other concrete implementation. But while there is typically only one {@link InputDataset} read in - * a given stage, there's no guarantee of that and the {@link org.apache.spark.executor.TaskMetrics} - * in the {@link org.apache.spark.scheduler.StageInfo} won't disambiguate. - * - *

If a facet needs to be attached to a specific dataset, the user must take care to construct - * both the Dataset and the Facet in the same builder. - */ -@Slf4j -@AllArgsConstructor -class OpenLineageRunEventBuilder { - - @NonNull private final OpenLineageContext openLineageContext; - - @NonNull - private final Collection>> inputDatasetBuilders; - - @NonNull - private final Collection>> - inputDatasetQueryPlanVisitors; - - @NonNull - private final Collection>> outputDatasetBuilders; - - @NonNull - private final Collection>> - outputDatasetQueryPlanVisitors; - - @NonNull - private final Collection> datasetFacetBuilders; - - @NonNull - private final Collection> - inputDatasetFacetBuilders; - - @NonNull - private final Collection> - outputDatasetFacetBuilders; - - @NonNull private final Collection> runFacetBuilders; - @NonNull private final Collection> jobFacetBuilders; - @NonNull private final Collection columnLineageVisitors; - private final UnknownEntryFacetListener unknownEntryFacetListener = - UnknownEntryFacetListener.getInstance(); - private final Map jobMap = new HashMap<>(); - private final Map stageMap = new HashMap<>(); - - OpenLineageRunEventBuilder(OpenLineageContext context, OpenLineageEventHandlerFactory factory) { - this( - context, - factory.createInputDatasetBuilder(context), - factory.createInputDatasetQueryPlanVisitors(context), - factory.createOutputDatasetBuilder(context), - factory.createOutputDatasetQueryPlanVisitors(context), - factory.createDatasetFacetBuilders(context), - factory.createInputDatasetFacetBuilders(context), - factory.createOutputDatasetFacetBuilders(context), - factory.createRunFacetBuilders(context), - factory.createJobFacetBuilders(context), - factory.createColumnLevelLineageVisitors(context)); - } - - /** - * Add an {@link ActiveJob} and all of its {@link Stage}s to the maps so we can look them up by id - * later. - * - * @param job - */ - void registerJob(ActiveJob job) { - jobMap.put(job.jobId(), job); - stageMap.put(job.finalStage().id(), job.finalStage()); - job.finalStage() - .parents() - .forall( - toScalaFn( - stage -> { - stageMap.put(stage.id(), stage); - return true; - })); - } - - RunEvent buildRun( - Optional parentRunFacet, - RunEventBuilder runEventBuilder, - JobBuilder jobBuilder, - SparkListenerStageSubmitted event) { - Stage stage = stageMap.get(event.stageInfo().stageId()); - RDD rdd = stage.rdd(); - - List nodes = new ArrayList<>(); - nodes.addAll(Arrays.asList(event.stageInfo(), stage)); - - nodes.addAll(Rdds.flattenRDDs(rdd)); - - return populateRun(parentRunFacet, runEventBuilder, jobBuilder, nodes); - } - - RunEvent buildRun( - Optional parentRunFacet, - RunEventBuilder runEventBuilder, - JobBuilder jobBuilder, - SparkListenerStageCompleted event) { - Stage stage = stageMap.get(event.stageInfo().stageId()); - RDD rdd = stage.rdd(); - - List nodes = new ArrayList<>(); - nodes.addAll(Arrays.asList(event.stageInfo(), stage)); - - nodes.addAll(Rdds.flattenRDDs(rdd)); - - return populateRun(parentRunFacet, runEventBuilder, jobBuilder, nodes); - } - - RunEvent buildRun( - Optional parentRunFacet, - RunEventBuilder runEventBuilder, - JobBuilder jobBuilder, - SparkListenerSQLExecutionStart event) { - runEventBuilder.eventType(RunEvent.EventType.START); - return buildRun(parentRunFacet, runEventBuilder, jobBuilder, event, Optional.empty()); - } - - RunEvent buildRun( - Optional parentRunFacet, - RunEventBuilder runEventBuilder, - JobBuilder jobBuilder, - SparkListenerSQLExecutionEnd event) { - runEventBuilder.eventType(RunEvent.EventType.COMPLETE); - return buildRun(parentRunFacet, runEventBuilder, jobBuilder, event, Optional.empty()); - } - - RunEvent buildRun( - Optional parentRunFacet, - RunEventBuilder runEventBuilder, - JobBuilder jobBuilder, - SparkListenerJobStart event) { - runEventBuilder.eventType(RunEvent.EventType.START); - return buildRun( - parentRunFacet, - runEventBuilder, - jobBuilder, - event, - Optional.ofNullable(jobMap.get(event.jobId()))); - } - - RunEvent buildRun( - Optional parentRunFacet, - RunEventBuilder runEventBuilder, - JobBuilder jobBuilder, - SparkListenerJobEnd event) { - runEventBuilder.eventType( - event.jobResult() instanceof JobFailed - ? RunEvent.EventType.FAIL - : RunEvent.EventType.COMPLETE); - return buildRun( - parentRunFacet, - runEventBuilder, - jobBuilder, - event, - Optional.ofNullable(jobMap.get(event.jobId()))); - } - - private RunEvent buildRun( - Optional parentRunFacet, - RunEventBuilder runEventBuilder, - JobBuilder jobBuilder, - Object event, - Optional job) { - List nodes = new ArrayList<>(); - nodes.add(event); - job.ifPresent( - j -> { - nodes.add(j); - nodes.addAll(Rdds.flattenRDDs(j.finalStage().rdd())); - }); - - return populateRun(parentRunFacet, runEventBuilder, jobBuilder, nodes); - } - - private RunEvent populateRun( - Optional parentRunFacet, - RunEventBuilder runEventBuilder, - JobBuilder jobBuilder, - List nodes) { - OpenLineage openLineage = openLineageContext.getOpenLineage(); - - RunFacetsBuilder runFacetsBuilder = openLineage.newRunFacetsBuilder(); - OpenLineage.JobFacetsBuilder jobFacetsBuilder = - openLineageContext.getOpenLineage().newJobFacetsBuilder(); - - parentRunFacet.ifPresent(runFacetsBuilder::parent); - OpenLineage.JobFacets jobFacets = buildJobFacets(nodes, jobFacetBuilders, jobFacetsBuilder); - List inputDatasets = buildInputDatasets(nodes); - List outputDatasets = buildOutputDatasets(nodes); - openLineageContext - .getQueryExecution() - .filter(qe -> !FacetUtils.isFacetDisabled(openLineageContext, "spark_unknown")) - .flatMap(qe -> unknownEntryFacetListener.build(qe.optimizedPlan())) - .ifPresent(facet -> runFacetsBuilder.put("spark_unknown", facet)); - - RunFacets runFacets = buildRunFacets(nodes, runFacetBuilders, runFacetsBuilder); - OpenLineage.RunBuilder runBuilder = - openLineage.newRunBuilder().runId(openLineageContext.getRunUuid()).facets(runFacets); - runEventBuilder - .run(runBuilder.build()) - .job(jobBuilder.facets(jobFacets).build()) - .inputs(inputDatasets) - .outputs(outputDatasets); - - HookUtils.preBuild(openLineageContext, runEventBuilder); - return runEventBuilder.build(); - } - - private List buildInputDatasets(List nodes) { - openLineageContext - .getQueryExecution() - .ifPresent( - qe -> { - if (log.isDebugEnabled()) { - log.debug("Traversing optimized plan {}", qe.optimizedPlan().toJSON()); - log.debug("Physical plan executed {}", qe.executedPlan().toJSON()); - } - }); - log.debug( - "Visiting query plan {} with input dataset builders {}", - openLineageContext.getQueryExecution(), - inputDatasetBuilders); - - Function1> inputVisitor = - visitLogicalPlan(PlanUtils.merge(inputDatasetQueryPlanVisitors)); - - List datasets = - Stream.concat( - buildDatasets(nodes, inputDatasetBuilders), - openLineageContext - .getQueryExecution() - .map( - qe -> - fromSeq(qe.optimizedPlan().map(inputVisitor)).stream() - .flatMap(Collection::stream) - .map(((Class) InputDataset.class)::cast)) - .orElse(Stream.empty())) - .collect(Collectors.toList()); - OpenLineage openLineage = openLineageContext.getOpenLineage(); - if (!datasets.isEmpty()) { - Map inputFacetsMap = new HashMap<>(); - nodes.forEach( - event -> inputDatasetFacetBuilders.forEach(fn -> fn.accept(event, inputFacetsMap::put))); - Map datasetFacetsMap = new HashMap<>(); - nodes.forEach( - event -> inputDatasetFacetBuilders.forEach(fn -> fn.accept(event, inputFacetsMap::put))); - return datasets.stream() - .map( - ds -> - openLineage - .newInputDatasetBuilder() - .name(ds.getName()) - .namespace(ds.getNamespace()) - .inputFacets( - mergeFacets( - inputFacetsMap, ds.getInputFacets(), InputDatasetInputFacets.class)) - .facets(mergeFacets(datasetFacetsMap, ds.getFacets(), DatasetFacets.class)) - .build()) - .collect(Collectors.toList()); - } - return datasets; - } - - /** - * Returns a {@link Function1} that passes the input {@link LogicalPlan} node to the {@link - * #unknownEntryFacetListener} if the inputVisitor is defined for the input node. - * - * @param inputVisitor - * @param - * @return - */ - private Function1> visitLogicalPlan( - PartialFunction> inputVisitor) { - return ScalaConversionUtils.toScalaFn( - node -> - inputVisitor - .andThen( - toScalaFn( - ds -> { - unknownEntryFacetListener.accept(node); - return ds; - })) - .applyOrElse(node, toScalaFn(n -> Collections.emptyList()))); - } - - private List buildOutputDatasets(List nodes) { - log.debug( - "Visiting query plan {} with output dataset builders {}", - openLineageContext.getQueryExecution(), - outputDatasetBuilders); - Function1> visitor = - visitLogicalPlan(PlanUtils.merge(outputDatasetQueryPlanVisitors)); - List datasets = - Stream.concat( - buildDatasets(nodes, outputDatasetBuilders), - openLineageContext - .getQueryExecution() - .map(qe -> visitor.apply(qe.optimizedPlan())) - .map(Collection::stream) - .orElse(Stream.empty())) - .collect(Collectors.toList()); - - OpenLineage openLineage = openLineageContext.getOpenLineage(); - - if (!datasets.isEmpty()) { - Map outputFacetsMap = new HashMap<>(); - nodes.forEach( - event -> - outputDatasetFacetBuilders.forEach(fn -> fn.accept(event, outputFacetsMap::put))); - Map datasetFacetsMap = new HashMap<>(); - nodes.forEach( - event -> datasetFacetBuilders.forEach(fn -> fn.accept(event, datasetFacetsMap::put))); - return datasets.stream() - .map( - ds -> { - Map dsFacetsMap = new HashMap(datasetFacetsMap); - ColumnLevelLineageUtils.buildColumnLineageDatasetFacet( - openLineageContext, ds.getFacets().getSchema()) - .ifPresent(facet -> dsFacetsMap.put("columnLineage", facet)); - return openLineage - .newOutputDatasetBuilder() - .name(ds.getName()) - .namespace(ds.getNamespace()) - .outputFacets( - mergeFacets( - outputFacetsMap, ds.getOutputFacets(), OutputDatasetOutputFacets.class)) - .facets(mergeFacets(dsFacetsMap, ds.getFacets(), DatasetFacets.class)) - .build(); - }) - .collect(Collectors.toList()); - } - return datasets; - } - - private Stream buildDatasets( - List nodes, Collection>> builders) { - return nodes.stream() - .flatMap( - event -> - builders.stream() - .filter(pfn -> PlanUtils.safeIsDefinedAt(pfn, event)) - .map(pfn -> PlanUtils.safeApply(pfn, event)) - .flatMap(Collection::stream)); - } - - /** - * Attach facets to a facet container, such as an {@link InputDatasetInputFacets} or an {@link - * OutputDatasetOutputFacets}. Facets returned by a {@link CustomFacetBuilder} may be attached to - * a field in the container, such as {@link InputDatasetInputFacets#dataQualityMetrics} or may be - * attached as a key/value pair in the {@link InputDatasetInputFacets#additionalProperties} map. - * The serialized JSON does not distinguish between these, but the java class does. The Java class - * also has some fields, such as the {@link InputDatasetInputFacets#producer} URI, which need to - * be included in the serialized JSON. - * - *

This methods will generate a new facet container with properties potentially overridden by - * the values set by the custom facet generators. - * - * @param events - * @param builders - * @return - */ - private OpenLineage.JobFacets buildJobFacets( - List events, - Collection> builders, - OpenLineage.JobFacetsBuilder jobFacetsBuilder) { - events.forEach(event -> builders.forEach(fn -> fn.accept(event, jobFacetsBuilder::put))); - return jobFacetsBuilder.build(); - } - - private RunFacets buildRunFacets( - List events, - Collection> builders, - RunFacetsBuilder runFacetsBuilder) { - events.forEach(event -> builders.forEach(fn -> fn.accept(event, runFacetsBuilder::put))); - return runFacetsBuilder.build(); - } -} diff --git a/metadata-integration/java/spark-lineage-beta/src/main/java/io/openlineage/spark/agent/lifecycle/plan/LogicalRelationDatasetBuilder.java b/metadata-integration/java/spark-lineage-beta/src/main/java/io/openlineage/spark/agent/lifecycle/plan/LogicalRelationDatasetBuilder.java deleted file mode 100644 index dd58b9eaf140b..0000000000000 --- a/metadata-integration/java/spark-lineage-beta/src/main/java/io/openlineage/spark/agent/lifecycle/plan/LogicalRelationDatasetBuilder.java +++ /dev/null @@ -1,220 +0,0 @@ -/* -/* Copyright 2018-2023 contributors to the OpenLineage project -/* SPDX-License-Identifier: Apache-2.0 -*/ - -package io.openlineage.spark.agent.lifecycle.plan; - -import io.openlineage.client.OpenLineage; -import io.openlineage.client.OpenLineage.DatasetFacetsBuilder; -import io.openlineage.client.utils.DatasetIdentifier; -import io.openlineage.spark.agent.lifecycle.plan.handlers.JdbcRelationHandler; -import io.openlineage.spark.agent.util.PathUtils; -import io.openlineage.spark.agent.util.PlanUtils; -import io.openlineage.spark.api.AbstractQueryPlanDatasetBuilder; -import io.openlineage.spark.api.DatasetFactory; -import io.openlineage.spark.api.OpenLineageContext; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.List; -import java.util.Optional; -import java.util.stream.Collectors; -import lombok.extern.slf4j.Slf4j; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.spark.scheduler.SparkListenerEvent; -import org.apache.spark.sql.catalyst.catalog.CatalogTable; -import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan; -import org.apache.spark.sql.execution.datasources.HadoopFsRelation; -import org.apache.spark.sql.execution.datasources.LogicalRelation; -import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions; -import org.apache.spark.sql.execution.datasources.jdbc.JDBCRelation; -import scala.collection.JavaConversions; - -/** - * {@link LogicalPlan} visitor that attempts to extract a {@link OpenLineage.Dataset} from a {@link - * LogicalRelation}. The {@link org.apache.spark.sql.sources.BaseRelation} is tested for known - * types, such as {@link HadoopFsRelation} or {@link JDBCRelation}s, as those are easy to extract - * exact dataset information. - * - *

For {@link HadoopFsRelation}s, it is assumed that a single directory maps to a single {@link - * OpenLineage.Dataset}. Any files referenced are replaced by their parent directory and all files - * in a given directory are assumed to belong to the same {@link OpenLineage.Dataset}. Directory - * partitioning is currently not addressed. - * - *

For {@link JDBCRelation}s, {@link OpenLineage.Dataset} naming expects the namespace to be the - * JDBC connection URL (schema and authority only) and the table name to be the - * <database> - * .<tableName>. - * - *

{@link CatalogTable}s, if present, can be used to describe the {@link OpenLineage.Dataset} if - * its {@link org.apache.spark.sql.sources.BaseRelation} is unknown. - * - *

TODO If a user specifies the {@link JDBCOptions#JDBC_QUERY_STRING()} option, we do not parse - * the sql to determine the specific tables used. Since we return a List of {@link - * OpenLineage.Dataset}s, we can parse the sql and determine each table referenced to return a - * complete list of datasets referenced. - */ -@Slf4j -public class LogicalRelationDatasetBuilder - extends AbstractQueryPlanDatasetBuilder { - - private final DatasetFactory datasetFactory; - - public LogicalRelationDatasetBuilder( - OpenLineageContext context, DatasetFactory datasetFactory, boolean searchDependencies) { - super(context, searchDependencies); - this.datasetFactory = datasetFactory; - } - - @Override - public boolean isDefinedAtLogicalPlan(LogicalPlan x) { - // if a LogicalPlan is a single node plan like `select * from temp`, - // then it's leaf node and should not be considered output node - if (x instanceof LogicalRelation && isSingleNodeLogicalPlan(x) && !searchDependencies) { - return false; - } - - return x instanceof LogicalRelation - && (((LogicalRelation) x).relation() instanceof HadoopFsRelation - || ((LogicalRelation) x).relation() instanceof JDBCRelation - || ((LogicalRelation) x).catalogTable().isDefined()); - } - - private boolean isSingleNodeLogicalPlan(LogicalPlan x) { - return context - .getQueryExecution() - .map(qe -> qe.optimizedPlan()) - .filter(p -> p.equals(x)) - .isPresent() - && (x.children() == null || x.children().isEmpty()); - } - - @Override - public List apply(LogicalRelation logRel) { - if (logRel.catalogTable() != null && logRel.catalogTable().isDefined()) { - return handleCatalogTable(logRel); - } else if (logRel.relation() instanceof HadoopFsRelation) { - return handleHadoopFsRelation(logRel); - } else if (logRel.relation() instanceof JDBCRelation) { - return new JdbcRelationHandler<>(datasetFactory).handleRelation(logRel); - } - throw new IllegalArgumentException( - "Expected logical plan to be either HadoopFsRelation, JDBCRelation, " - + "or CatalogTable but was " - + logRel); - } - - private List handleCatalogTable(LogicalRelation logRel) { - CatalogTable catalogTable = logRel.catalogTable().get(); - - DatasetIdentifier di = PathUtils.fromCatalogTable(catalogTable); - - OpenLineage.DatasetFacetsBuilder datasetFacetsBuilder = - context.getOpenLineage().newDatasetFacetsBuilder(); - datasetFacetsBuilder.schema(PlanUtils.schemaFacet(context.getOpenLineage(), logRel.schema())); - datasetFacetsBuilder.dataSource( - PlanUtils.datasourceFacet(context.getOpenLineage(), di.getNamespace())); - - getDatasetVersion(logRel) - .map( - version -> - datasetFacetsBuilder.version( - context.getOpenLineage().newDatasetVersionDatasetFacet(version))); - - return Collections.singletonList(datasetFactory.getDataset(di, datasetFacetsBuilder)); - } - - private List handleHadoopFsRelation(LogicalRelation x) { - HadoopFsRelation relation = (HadoopFsRelation) x.relation(); - try { - return context - .getSparkSession() - .map( - session -> { - Configuration hadoopConfig = - session.sessionState().newHadoopConfWithOptions(relation.options()); - - DatasetFacetsBuilder datasetFacetsBuilder = - context.getOpenLineage().newDatasetFacetsBuilder(); - getDatasetVersion(x) - .map( - version -> - datasetFacetsBuilder.version( - context.getOpenLineage().newDatasetVersionDatasetFacet(version))); - - Collection rootPaths = - JavaConversions.asJavaCollection(relation.location().rootPaths()); - - if (isSingleFileRelation(rootPaths, hadoopConfig)) { - return Collections.singletonList( - datasetFactory.getDataset( - rootPaths.stream().findFirst().get().toUri(), - relation.schema(), - datasetFacetsBuilder)); - } else { - return rootPaths.stream() - .map(p -> PlanUtils.getDirectoryPath(p, hadoopConfig)) - .distinct() - .map( - p -> { - // TODO- refactor this to return a single partitioned dataset based on - // static - // static partitions in the relation - return datasetFactory.getDataset( - p.toUri(), relation.schema(), datasetFacetsBuilder); - }) - .collect(Collectors.toList()); - } - }) - .orElse(Collections.emptyList()); - } catch (Exception e) { - if ("com.databricks.backend.daemon.data.client.adl.AzureCredentialNotFoundExcepgittion" - .equals(e.getClass().getName())) { - // This is a fallback that can occur when hadoop configurations cannot be - // reached. This occurs in Azure Databricks when credential passthrough - // is enabled and you're attempting to get the data lake credentials. - // The Spark Listener context cannot use the user credentials - // thus we need a fallback. - // This is similar to the InsertIntoHadoopRelationVisitor's process for getting - // Datasets - List inputDatasets = new ArrayList(); - List paths = - new ArrayList<>(JavaConversions.asJavaCollection(relation.location().rootPaths())); - for (Path p : paths) { - inputDatasets.add(datasetFactory.getDataset(p.toUri(), relation.schema())); - } - if (inputDatasets.isEmpty()) { - return Collections.emptyList(); - } else { - return inputDatasets; - } - } else { - throw e; - } - } - } - - private boolean isSingleFileRelation(Collection paths, Configuration hadoopConfig) { - if (paths.size() != 1) { - return false; - } - - try { - Path path = paths.stream().findFirst().get(); - return path.getFileSystem(hadoopConfig).isFile(path); - /* - Unfortunately it seems like on DataBricks this can throw an SparkException as well if credentials are missing. - Like org.apache.spark.SparkException: There is no Credential Scope. - */ - } catch (Exception e) { - return false; - } - } - - protected Optional getDatasetVersion(LogicalRelation x) { - // not implemented - return Optional.empty(); - } -} diff --git a/metadata-integration/java/spark-lineage-beta/src/main/java/io/openlineage/spark/agent/util/PathUtils.java b/metadata-integration/java/spark-lineage-beta/src/main/java/io/openlineage/spark/agent/util/PathUtils.java deleted file mode 100644 index b72d28ce72dd9..0000000000000 --- a/metadata-integration/java/spark-lineage-beta/src/main/java/io/openlineage/spark/agent/util/PathUtils.java +++ /dev/null @@ -1,207 +0,0 @@ -/* -/* Copyright 2018-2023 contributors to the OpenLineage project -/* SPDX-License-Identifier: Apache-2.0 -*/ - -package io.openlineage.spark.agent.util; - -import com.typesafe.config.Config; -import com.typesafe.config.ConfigFactory; -import datahub.spark.conf.SparkAppContext; -import datahub.spark.conf.SparkConfigParser; -import io.datahubproject.openlineage.config.DatahubOpenlineageConfig; -import io.datahubproject.openlineage.dataset.HdfsPathDataset; -import io.openlineage.client.utils.DatasetIdentifier; -import io.openlineage.client.utils.DatasetIdentifierUtils; -import java.io.File; -import java.net.URI; -import java.net.URISyntaxException; -import java.util.Arrays; -import java.util.Optional; -import java.util.stream.Collectors; -import lombok.SneakyThrows; -import lombok.extern.slf4j.Slf4j; -import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.fs.Path; -import org.apache.spark.SparkConf; -import org.apache.spark.sql.SparkSession; -import org.apache.spark.sql.catalyst.TableIdentifier; -import org.apache.spark.sql.catalyst.catalog.CatalogTable; -import org.apache.spark.sql.internal.StaticSQLConf; - -@Slf4j -@SuppressWarnings("checkstyle:HideUtilityClassConstructor") -public class PathUtils { - - private static final String DEFAULT_SCHEME = "file"; - public static final String SPARK_OPENLINEAGE_DATASET_REMOVE_PATH_PATTERN = - "spark.openlineage.dataset.removePath.pattern"; - public static final String REMOVE_PATTERN_GROUP = "remove"; - - private static Optional sparkConf = Optional.empty(); - - public static DatasetIdentifier fromPath(Path path) { - return fromPath(path, DEFAULT_SCHEME); - } - - public static DatasetIdentifier fromPath(Path path, String defaultScheme) { - return fromURI(path.toUri(), defaultScheme); - } - - public static DatasetIdentifier fromURI(URI location) { - return fromURI(location, DEFAULT_SCHEME); - } - - public static DatasetIdentifier fromURI(URI location, String defaultScheme) { - DatasetIdentifier di = DatasetIdentifierUtils.fromURI(location, defaultScheme); - return new DatasetIdentifier(removePathPattern(di.getName()), di.getNamespace()); - } - - public static DatasetIdentifier fromCatalogTable(CatalogTable catalogTable) { - return fromCatalogTable(catalogTable, loadSparkConf()); - } - - /** - * Create DatasetIdentifier from CatalogTable, using storage's locationURI if it exists. In other - * way, use defaultTablePath. - */ - @SneakyThrows - public static DatasetIdentifier fromCatalogTable( - CatalogTable catalogTable, Optional sparkConf) { - - DatasetIdentifier di; - if (catalogTable.storage() != null && catalogTable.storage().locationUri().isDefined()) { - di = PathUtils.fromURI(catalogTable.storage().locationUri().get(), DEFAULT_SCHEME); - } else { - // try to obtain location - try { - di = prepareDatasetIdentifierFromDefaultTablePath(catalogTable); - } catch (IllegalStateException e) { - // session inactive - no way to find DatasetProvider - throw new IllegalArgumentException( - "Unable to extract DatasetIdentifier from a CatalogTable", e); - } - } - - Optional metastoreUri = extractMetastoreUri(sparkConf); - // TODO: Is the call to "metastoreUri.get()" really needed? - // Java's Optional should prevent the null in the first place. - if (metastoreUri.isPresent() && metastoreUri.get() != null) { - // dealing with Hive tables - DatasetIdentifier symlink = prepareHiveDatasetIdentifier(catalogTable, metastoreUri.get()); - return di.withSymlink( - symlink.getName(), symlink.getNamespace(), DatasetIdentifier.SymlinkType.TABLE); - } else { - return di.withSymlink( - nameFromTableIdentifier(catalogTable.identifier()), - StringUtils.substringBeforeLast(di.getName(), File.separator), - DatasetIdentifier.SymlinkType.TABLE); - } - } - - @SneakyThrows - private static DatasetIdentifier prepareDatasetIdentifierFromDefaultTablePath( - CatalogTable catalogTable) { - URI uri = - SparkSession.active().sessionState().catalog().defaultTablePath(catalogTable.identifier()); - - return PathUtils.fromURI(uri); - } - - @SneakyThrows - private static DatasetIdentifier prepareHiveDatasetIdentifier( - CatalogTable catalogTable, URI metastoreUri) { - String qualifiedName = nameFromTableIdentifier(catalogTable.identifier()); - if (!qualifiedName.startsWith("/")) { - qualifiedName = String.format("/%s", qualifiedName); - } - return PathUtils.fromPath( - new Path(enrichHiveMetastoreURIWithTableName(metastoreUri, qualifiedName))); - } - - @SneakyThrows - public static URI enrichHiveMetastoreURIWithTableName(URI metastoreUri, String qualifiedName) { - return new URI( - "hive", null, metastoreUri.getHost(), metastoreUri.getPort(), qualifiedName, null, null); - } - - /** - * SparkConf does not change through job lifetime but it can get lost once session is closed. It's - * good to have it set in case of SPARK-29046 - */ - private static Optional loadSparkConf() { - if (!sparkConf.isPresent() && SparkSession.getDefaultSession().isDefined()) { - sparkConf = Optional.of(SparkSession.getDefaultSession().get().sparkContext().getConf()); - } - return sparkConf; - } - - private static Optional extractMetastoreUri(Optional sparkConf) { - // make sure SparkConf is present - if (!sparkConf.isPresent()) { - return Optional.empty(); - } - - // make sure enableHiveSupport is called - Optional setting = - SparkConfUtils.findSparkConfigKey( - sparkConf.get(), StaticSQLConf.CATALOG_IMPLEMENTATION().key()); - if (!setting.isPresent() || !"hive".equals(setting.get())) { - return Optional.empty(); - } - - return SparkConfUtils.getMetastoreUri(sparkConf.get()); - } - - private static String removeFirstSlashIfSingleSlashInString(String name) { - if (name.chars().filter(x -> x == '/').count() == 1 && name.startsWith("/")) { - return name.substring(1); - } - return name; - } - - private static String removePathPattern(String datasetName) { - // TODO: The reliance on global-mutable state here should be changed - // this led to problems in the PathUtilsTest class, where some tests interfered with others - log.info("Removing path pattern from dataset name {}", datasetName); - Optional conf = loadSparkConf(); - if (!conf.isPresent()) { - return datasetName; - } - try { - String propertiesString = - Arrays.stream(conf.get().getAllWithPrefix("spark.datahub.")) - .map(tup -> tup._1 + "= \"" + tup._2 + "\"") - .collect(Collectors.joining("\n")); - Config datahubConfig = ConfigFactory.parseString(propertiesString); - DatahubOpenlineageConfig datahubOpenlineageConfig = - SparkConfigParser.sparkConfigToDatahubOpenlineageConf( - datahubConfig, new SparkAppContext()); - HdfsPathDataset hdfsPath = - HdfsPathDataset.create(new URI(datasetName), datahubOpenlineageConfig); - log.debug("Transformed path is {}", hdfsPath.getDatasetPath()); - return hdfsPath.getDatasetPath(); - } catch (InstantiationException e) { - log.warn( - "Unable to convert dataset {} to path the exception was {}", datasetName, e.getMessage()); - return datasetName; - } catch (URISyntaxException e) { - throw new RuntimeException(e); - } - } - - private static String nameFromTableIdentifier(TableIdentifier identifier) { - // we create name instead of calling `unquotedString` method which includes spark_catalog - // for Spark 3.4 - String name; - if (identifier.database().isDefined()) { - // include database in name - name = String.format("%s.%s", identifier.database().get(), identifier.table()); - } else { - // just table name - name = identifier.table(); - } - - return name; - } -} diff --git a/metadata-integration/java/spark-lineage-beta/src/main/java/io/openlineage/spark/agent/util/PlanUtils.java b/metadata-integration/java/spark-lineage-beta/src/main/java/io/openlineage/spark/agent/util/PlanUtils.java index 8d93b0288b515..d46d741d155b8 100644 --- a/metadata-integration/java/spark-lineage-beta/src/main/java/io/openlineage/spark/agent/util/PlanUtils.java +++ b/metadata-integration/java/spark-lineage-beta/src/main/java/io/openlineage/spark/agent/util/PlanUtils.java @@ -1,16 +1,19 @@ /* -/* Copyright 2018-2023 contributors to the OpenLineage project +/* Copyright 2018-2024 contributors to the OpenLineage project /* SPDX-License-Identifier: Apache-2.0 */ package io.openlineage.spark.agent.util; +import static io.openlineage.spark.agent.lifecycle.ExecutionContext.CAMEL_TO_SNAKE_CASE; + import com.typesafe.config.Config; import com.typesafe.config.ConfigFactory; import datahub.spark.conf.SparkLineageConf; import io.datahubproject.openlineage.dataset.HdfsPathDataset; import io.openlineage.client.OpenLineage; import io.openlineage.spark.agent.Versions; +import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; import java.util.ArrayList; @@ -18,71 +21,44 @@ import java.util.Collection; import java.util.Collections; import java.util.List; +import java.util.Locale; import java.util.Objects; import java.util.Optional; import java.util.UUID; import java.util.stream.Collectors; -import java.util.stream.Stream; import lombok.extern.slf4j.Slf4j; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapred.FileInputFormat; import org.apache.spark.SparkConf; import org.apache.spark.SparkEnv; -import org.apache.spark.package$; -import org.apache.spark.rdd.HadoopRDD; import org.apache.spark.rdd.RDD; import org.apache.spark.sql.catalyst.expressions.Attribute; -import org.apache.spark.sql.execution.datasources.FileScanRDD; import org.apache.spark.sql.types.StructField; import org.apache.spark.sql.types.StructType; import scala.PartialFunction; -import scala.runtime.AbstractPartialFunction; +import scala.PartialFunction$; /** * Utility functions for traversing a {@link * org.apache.spark.sql.catalyst.plans.logical.LogicalPlan}. */ @Slf4j -@SuppressWarnings("checkstyle:HideUtilityClassConstructor") public class PlanUtils { - - public static final String SLASH_DELIMITER_USER_PASSWORD_REGEX = - "[A-Za-z0-9_%]+//?[A-Za-z0-9_%]*@"; - public static final String COLON_DELIMITER_USER_PASSWORD_REGEX = - "([/|,])[A-Za-z0-9_%]+:?[A-Za-z0-9_%]*@"; - - /** - * Merge a list of {@link PartialFunction}s and return the first value where the function is - * defined or empty list if no function matches the input. - * - * @param fns - * @param arg - * @param - * @param - * @return - */ - public static Collection applyAll( - List>> fns, T arg) { - PartialFunction> fn = merge(fns); - if (fn.isDefinedAt(arg)) { - return fn.apply(arg); - } - return Collections.emptyList(); - } - /** * Given a list of {@link PartialFunction}s merge to produce a single function that will test the - * input against each function one by one until a match is found or empty() is returned. + * input against each function one by one until a match is found or {@link + * PartialFunction$#empty()} is returned. * * @param fns * @param * @param * @return */ - public static PartialFunction> merge( + public static OpenLineageAbstractPartialFunction> merge( Collection>> fns) { - return new AbstractPartialFunction>() { + return new OpenLineageAbstractPartialFunction>() { + String appliedClassName; + @Override public boolean isDefinedAt(T x) { return fns.stream() @@ -110,6 +86,7 @@ public Collection apply(T x) { x.getClass().getCanonicalName(), collection); } + appliedClassName = x.getClass().getName(); return collection; } catch (RuntimeException | NoClassDefFoundError | NoSuchMethodError e) { log.error("Apply failed:", e); @@ -120,6 +97,11 @@ public Collection apply(T x) { .flatMap(Collection::stream) .collect(Collectors.toList()); } + + @Override + String appliedName() { + return appliedClassName; + } }; } @@ -204,12 +186,26 @@ public static OpenLineage.ParentRunFacet parentRunFacet( .run(new OpenLineage.ParentRunFacetRunBuilder().runId(parentRunId).build()) .job( new OpenLineage.ParentRunFacetJobBuilder() - .name(parentJob) + .name(parentJob.replaceAll(CAMEL_TO_SNAKE_CASE, "_$1").toLowerCase(Locale.ROOT)) .namespace(parentJobNamespace) .build()) .build(); } + public static Path getDirectoryPathOl(Path p, Configuration hadoopConf) { + try { + if (p.getFileSystem(hadoopConf).getFileStatus(p).isFile()) { + return p.getParent(); + } else { + return p; + } + } catch (IOException e) { + log.warn("Unable to get file system for path ", e); + return p; + } + } + + // This method was replaced to support Datahub PathSpecs public static Path getDirectoryPath(Path p, Configuration hadoopConf) { SparkConf conf = SparkEnv.get().conf(); String propertiesString = @@ -229,17 +225,6 @@ public static Path getDirectoryPath(Path p, Configuration hadoopConf) { log.warn("Unable to convert path to hdfs path {} the exception was {}", p, e.getMessage()); return p; } - - // try { - // if (p.getFileSystem(hadoopConf).getFileStatus(p).isFile()) { - // return p.getParent(); - // } else { - // return p; - // } - // } catch (IOException e) { - // log.warn("Unable to get file system for path ", e); - // return p; - // } } /** @@ -251,36 +236,7 @@ public static Path getDirectoryPath(Path p, Configuration hadoopConf) { */ public static List findRDDPaths(List> fileRdds) { return fileRdds.stream() - .flatMap( - rdd -> { - if (rdd instanceof HadoopRDD) { - HadoopRDD hadoopRDD = (HadoopRDD) rdd; - Path[] inputPaths = FileInputFormat.getInputPaths(hadoopRDD.getJobConf()); - Configuration hadoopConf = hadoopRDD.getConf(); - return Arrays.stream(inputPaths) - .map(p -> PlanUtils.getDirectoryPath(p, hadoopConf)); - } else if (rdd instanceof FileScanRDD) { - FileScanRDD fileScanRDD = (FileScanRDD) rdd; - return ScalaConversionUtils.fromSeq(fileScanRDD.filePartitions()).stream() - .flatMap(fp -> Arrays.stream(fp.files())) - .map( - f -> { - if (package$.MODULE$.SPARK_VERSION().compareTo("3.4") > 0) { - // filePath returns SparkPath for Spark 3.4 - return ReflectionUtils.tryExecuteMethod(f, "filePath") - .map(o -> ReflectionUtils.tryExecuteMethod(o, "toPath")) - .map(o -> (Path) o.get()) - .get() - .getParent(); - } else { - return new Path(f.filePath()).getParent(); - } - }); - } else { - log.warn("Unknown RDD class {}", rdd.getClass().getCanonicalName()); - return Stream.empty(); - } - }) + .flatMap(RddPathUtils::findRDDPaths) .distinct() .collect(Collectors.toList()); } @@ -316,11 +272,11 @@ public static boolean safeIsDefinedAt(PartialFunction pfn, Object x) { return false; } catch (Exception e) { if (e != null) { - log.debug("isDefinedAt method failed on {}", e); + log.info("isDefinedAt method failed on {}", e); } return false; } catch (NoClassDefFoundError e) { - log.debug("isDefinedAt method failed on {}", e.getMessage()); + log.info("isDefinedAt method failed on {}", e.getMessage()); return false; } } @@ -331,6 +287,8 @@ public static boolean safeIsDefinedAt(PartialFunction pfn, Object x) { * @param pfn * @param x * @return + * @param + * @param */ public static List safeApply(PartialFunction> pfn, D x) { try { diff --git a/metadata-integration/java/spark-lineage-beta/src/main/java/io/openlineage/spark/agent/util/RemovePathPatternUtils.java b/metadata-integration/java/spark-lineage-beta/src/main/java/io/openlineage/spark/agent/util/RemovePathPatternUtils.java new file mode 100644 index 0000000000000..a606a44ddd516 --- /dev/null +++ b/metadata-integration/java/spark-lineage-beta/src/main/java/io/openlineage/spark/agent/util/RemovePathPatternUtils.java @@ -0,0 +1,182 @@ +/* +/* Copyright 2018-2024 contributors to the OpenLineage project +/* SPDX-License-Identifier: Apache-2.0 +*/ + +package io.openlineage.spark.agent.util; + +import com.typesafe.config.Config; +import com.typesafe.config.ConfigFactory; +import datahub.spark.conf.SparkAppContext; +import datahub.spark.conf.SparkConfigParser; +import io.datahubproject.openlineage.config.DatahubOpenlineageConfig; +import io.datahubproject.openlineage.dataset.HdfsPathDataset; +import io.openlineage.client.OpenLineage.InputDataset; +import io.openlineage.client.OpenLineage.OutputDataset; +import io.openlineage.spark.api.OpenLineageContext; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.Arrays; +import java.util.List; +import java.util.Optional; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang.StringUtils; +import org.apache.spark.SparkConf; +import org.apache.spark.sql.SparkSession; + +/** + * Utility class to handle removing path patterns in dataset names. Given a configured regex pattern + * with "remove" group defined, class methods run regex replacements on all the datasets available + * within the event + */ +@Slf4j +public class RemovePathPatternUtils { + public static final String REMOVE_PATTERN_GROUP = "remove"; + public static final String SPARK_OPENLINEAGE_DATASET_REMOVE_PATH_PATTERN = + "spark.openlineage.dataset.removePath.pattern"; + + private static Optional sparkConf = Optional.empty(); + + public static List removeOutputsPathPattern_ol( + OpenLineageContext context, List outputs) { + return getPattern(context) + .map( + pattern -> + outputs.stream() + .map( + dataset -> { + String newName = removePath(pattern, dataset.getName()); + if (newName != dataset.getName()) { + return context + .getOpenLineage() + .newOutputDatasetBuilder() + .name(removePath(pattern, dataset.getName())) + .namespace(dataset.getNamespace()) + .facets(dataset.getFacets()) + .outputFacets(dataset.getOutputFacets()) + .build(); + } else { + return dataset; + } + }) + .collect(Collectors.toList())) + .orElse(outputs); + } + + // This method was replaced to support Datahub PathSpecs + public static List removeOutputsPathPattern( + OpenLineageContext context, List outputs) { + return outputs.stream() + .map( + dataset -> { + String newName = removePathPattern(dataset.getName()); + if (newName != dataset.getName()) { + return context + .getOpenLineage() + .newOutputDatasetBuilder() + .name(newName) + .namespace(dataset.getNamespace()) + .facets(dataset.getFacets()) + .outputFacets(dataset.getOutputFacets()) + .build(); + } else { + return dataset; + } + }) + .collect(Collectors.toList()); + } + + // This method was replaced to support Datahub PathSpecs + public static List removeInputsPathPattern( + OpenLineageContext context, List inputs) { + return inputs.stream() + .map( + dataset -> { + String newName = removePathPattern(dataset.getName()); + if (newName != dataset.getName()) { + return context + .getOpenLineage() + .newInputDatasetBuilder() + .name(newName) + .namespace(dataset.getNamespace()) + .facets(dataset.getFacets()) + .inputFacets(dataset.getInputFacets()) + .build(); + } else { + return dataset; + } + }) + .collect(Collectors.toList()); + } + + private static Optional getPattern(OpenLineageContext context) { + return Optional.ofNullable(context.getSparkContext()) + .map(sparkContext -> sparkContext.conf()) + .filter(conf -> conf.contains(SPARK_OPENLINEAGE_DATASET_REMOVE_PATH_PATTERN)) + .map(conf -> conf.get(SPARK_OPENLINEAGE_DATASET_REMOVE_PATH_PATTERN)) + .map(pattern -> Pattern.compile(pattern)); + } + + private static String removePath(Pattern pattern, String name) { + return Optional.ofNullable(pattern.matcher(name)) + .filter(matcher -> matcher.find()) + .filter( + matcher -> { + try { + matcher.group(REMOVE_PATTERN_GROUP); + return true; + } catch (IllegalStateException | IllegalArgumentException e) { + return false; + } + }) + .filter(matcher -> StringUtils.isNotEmpty(matcher.group(REMOVE_PATTERN_GROUP))) + .map( + matcher -> + name.substring(0, matcher.start(REMOVE_PATTERN_GROUP)) + + name.substring(matcher.end(REMOVE_PATTERN_GROUP), name.length())) + .orElse(name); + } + + /** + * SparkConf does not change through job lifetime but it can get lost once session is closed. It's + * good to have it set in case of SPARK-29046 + */ + private static Optional loadSparkConf() { + if (!sparkConf.isPresent() && SparkSession.getDefaultSession().isDefined()) { + sparkConf = Optional.of(SparkSession.getDefaultSession().get().sparkContext().getConf()); + } + return sparkConf; + } + + private static String removePathPattern(String datasetName) { + // TODO: The reliance on global-mutable state here should be changed + // this led to problems in the PathUtilsTest class, where some tests interfered with others + log.info("Removing path pattern from dataset name {}", datasetName); + Optional conf = loadSparkConf(); + if (!conf.isPresent()) { + return datasetName; + } + try { + String propertiesString = + Arrays.stream(conf.get().getAllWithPrefix("spark.datahub.")) + .map(tup -> tup._1 + "= \"" + tup._2 + "\"") + .collect(Collectors.joining("\n")); + Config datahubConfig = ConfigFactory.parseString(propertiesString); + DatahubOpenlineageConfig datahubOpenlineageConfig = + SparkConfigParser.sparkConfigToDatahubOpenlineageConf( + datahubConfig, new SparkAppContext()); + HdfsPathDataset hdfsPath = + HdfsPathDataset.create(new URI(datasetName), datahubOpenlineageConfig); + log.debug("Transformed path is {}", hdfsPath.getDatasetPath()); + return hdfsPath.getDatasetPath(); + } catch (InstantiationException e) { + log.warn( + "Unable to convert dataset {} to path the exception was {}", datasetName, e.getMessage()); + return datasetName; + } catch (URISyntaxException e) { + throw new RuntimeException(e); + } + } +} diff --git a/metadata-integration/java/spark-lineage-beta/src/test/resources/ol_events/sample_spark.json b/metadata-integration/java/spark-lineage-beta/src/test/resources/ol_events/sample_spark.json index 77a6ebc4044bd..acb7f585e98c9 100644 --- a/metadata-integration/java/spark-lineage-beta/src/test/resources/ol_events/sample_spark.json +++ b/metadata-integration/java/spark-lineage-beta/src/test/resources/ol_events/sample_spark.json @@ -40,7 +40,7 @@ "inputs": [ { "namespace": "file", - "name": "/Users/treff7es/shadow/spark-test/people.json", + "name": "/my_folder/spark-test/people.json", "facets": { "dataSource": { "_producer": "https://github.com/OpenLineage/OpenLineage/tree/1.2.2/integration/spark", @@ -69,7 +69,7 @@ "outputs": [ { "namespace": "file", - "name": "/Users/treff7es/shadow/spark-test/result", + "name": "/my_folder/shadow/spark-test/result", "facets": { "dataSource": { "_producer": "https://github.com/OpenLineage/OpenLineage/tree/1.2.2/integration/spark", @@ -95,7 +95,7 @@ "inputFields": [ { "namespace": "file", - "name": "/Users/treff7es/shadow/spark-test/people.json", + "name": "/my_folder/spark-test/people.json", "field": "name" } ] diff --git a/metadata-integration/java/spark-lineage/build.gradle b/metadata-integration/java/spark-lineage/build.gradle index 1b3c87288abf8..8db8a09f8cc81 100644 --- a/metadata-integration/java/spark-lineage/build.gradle +++ b/metadata-integration/java/spark-lineage/build.gradle @@ -48,7 +48,7 @@ dependencies { provided(externalDependency.sparkSql) provided(externalDependency.sparkHive) - implementation externalDependency.httpAsyncClient + implementation externalDependency.httpClient // Tests need a concrete log4j available. Providing it here testImplementation 'org.apache.logging.log4j:log4j-api:2.17.1' @@ -106,7 +106,7 @@ shadowJar { relocate 'com.fasterxml.jackson', 'datahub.shaded.jackson' relocate 'org.slf4j','datahub.shaded.org.slf4j' - relocate 'org.apache.http','datahub.spark2.shaded.http' + relocate 'org.apache.hc','datahub.spark2.shaded.http' relocate 'org.apache.commons.codec', 'datahub.spark2.shaded.o.a.c.codec' relocate 'org.apache.commons.compress', 'datahub.spark2.shaded.o.a.c.compress' relocate 'org.apache.commons.io', 'datahub.spark2.shaded.o.a.c.io' diff --git a/metadata-io/build.gradle b/metadata-io/build.gradle index 532395f158c02..5bd73c844b380 100644 --- a/metadata-io/build.gradle +++ b/metadata-io/build.gradle @@ -10,6 +10,7 @@ configurations { dependencies { implementation project(':entity-registry') implementation project(':metadata-service:auth-config') + api project(':metadata-io:metadata-io-api') api project(':metadata-utils') api project(':metadata-events:mxe-avro') api project(':metadata-events:mxe-registration') @@ -25,7 +26,8 @@ dependencies { implementation externalDependency.guava implementation externalDependency.reflections - implementation externalDependency.jsonPatch + + implementation 'com.github.java-json-tools:json-patch:1.13' // TODO: Replace with jakarta.json api(externalDependency.dgraph4j) { exclude group: 'com.google.guava', module: 'guava' exclude group: 'io.grpc', module: 'grpc-protobuf' diff --git a/metadata-io/metadata-io-api/README.txt b/metadata-io/metadata-io-api/README.txt new file mode 100644 index 0000000000000..a9d52d55341a8 --- /dev/null +++ b/metadata-io/metadata-io-api/README.txt @@ -0,0 +1,4 @@ +# :metadata-io:metadata-io-api + +This module exists in order to isolate dependencies when used in external projects. For example, +a custom plugin implementing a custom validator, mutator, or side-effect. \ No newline at end of file diff --git a/metadata-io/metadata-io-api/build.gradle b/metadata-io/metadata-io-api/build.gradle new file mode 100644 index 0000000000000..bd79e8cb3ddef --- /dev/null +++ b/metadata-io/metadata-io-api/build.gradle @@ -0,0 +1,11 @@ +plugins { + id 'java-library' +} + +dependencies { + implementation project(':entity-registry') + implementation project(':metadata-service:services') + implementation project(':metadata-utils') + compileOnly externalDependency.lombok + annotationProcessor externalDependency.lombok +} diff --git a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/EntityApiUtils.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/EntityApiUtils.java new file mode 100644 index 0000000000000..656534e24f551 --- /dev/null +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/EntityApiUtils.java @@ -0,0 +1,60 @@ +package com.linkedin.metadata.entity; + +import static com.linkedin.metadata.Constants.DEFAULT_RUN_ID; + +import com.datahub.util.RecordUtils; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.utils.EntityKeyUtils; +import com.linkedin.metadata.utils.GenericRecordUtils; +import com.linkedin.metadata.utils.PegasusUtils; +import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.mxe.SystemMetadata; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class EntityApiUtils { + + private EntityApiUtils() {} + + @Nonnull + public static String toJsonAspect(@Nonnull final RecordTemplate aspectRecord) { + return RecordUtils.toJsonString(aspectRecord); + } + + public static RecordTemplate buildKeyAspect( + @Nonnull EntityRegistry entityRegistry, @Nonnull final Urn urn) { + final EntitySpec spec = entityRegistry.getEntitySpec(PegasusUtils.urnToEntityName(urn)); + final AspectSpec keySpec = spec.getKeyAspectSpec(); + return EntityKeyUtils.convertUrnToEntityKey(urn, keySpec); + } + + public static SystemMetadata parseSystemMetadata(String jsonSystemMetadata) { + if (jsonSystemMetadata == null || jsonSystemMetadata.equals("")) { + SystemMetadata response = new SystemMetadata(); + response.setRunId(DEFAULT_RUN_ID); + response.setLastObserved(0); + return response; + } + return RecordUtils.toRecordTemplate(SystemMetadata.class, jsonSystemMetadata); + } + + public static MetadataChangeProposal buildMCP( + Urn entityUrn, String aspectName, ChangeType changeType, @Nullable T aspect) { + MetadataChangeProposal proposal = new MetadataChangeProposal(); + proposal.setEntityUrn(entityUrn); + proposal.setChangeType(changeType); + proposal.setEntityType(entityUrn.getEntityType()); + proposal.setAspectName(aspectName); + if (aspect != null) { + proposal.setAspect(GenericRecordUtils.serializeAspect(aspect)); + } + return proposal; + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityAspect.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/EntityAspect.java similarity index 92% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/EntityAspect.java rename to metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/EntityAspect.java index ae1b3007ed647..cba770d841b94 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityAspect.java +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/EntityAspect.java @@ -1,7 +1,5 @@ package com.linkedin.metadata.entity; -import static com.linkedin.metadata.entity.EntityUtils.parseSystemMetadata; - import com.datahub.util.RecordUtils; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; @@ -53,10 +51,6 @@ public class EntityAspect { private String createdFor; - public EntityAspectIdentifier getAspectIdentifier() { - return new EntityAspectIdentifier(getUrn(), getAspect(), getVersion()); - } - /** * Provide a typed EntityAspect without breaking the existing public contract with generic types. */ @@ -110,11 +104,7 @@ public long getVersion() { @Nullable public SystemMetadata getSystemMetadata() { - return parseSystemMetadata(getSystemMetadataRaw()); - } - - public EntityAspectIdentifier getAspectIdentifier() { - return entityAspect.getAspectIdentifier(); + return EntityApiUtils.parseSystemMetadata(getSystemMetadataRaw()); } /** diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java similarity index 90% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java rename to metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java index ad1e26575d7c0..0914df744e413 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/AspectsBatchImpl.java @@ -11,7 +11,6 @@ import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection; import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.util.Pair; -import java.util.ArrayList; import java.util.Collection; import java.util.LinkedList; import java.util.List; @@ -23,7 +22,6 @@ import lombok.Builder; import lombok.Getter; import lombok.extern.slf4j.Slf4j; -import org.apache.commons.lang3.StringUtils; @Slf4j @Getter @@ -156,20 +154,4 @@ public int hashCode() { public String toString() { return "AspectsBatchImpl{" + "items=" + items + '}'; } - - public String toAbbreviatedString(int maxWidth) { - List itemsAbbreviated = new ArrayList(); - items.forEach( - item -> { - if (item instanceof ChangeItemImpl) { - itemsAbbreviated.add(((ChangeItemImpl) item).toAbbreviatedString()); - } else { - itemsAbbreviated.add(item.toString()); - } - }); - return "AspectsBatchImpl{" - + "items=" - + StringUtils.abbreviate(itemsAbbreviated.toString(), maxWidth) - + '}'; - } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java similarity index 82% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java rename to metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java index e84a7e8a0ab51..30e9251982f10 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/ChangeItemImpl.java @@ -1,9 +1,6 @@ package com.linkedin.metadata.entity.ebean.batch; -import static com.linkedin.metadata.entity.AspectUtils.validateAspect; - import com.datahub.util.exception.ModelConversionException; -import com.github.fge.jsonpatch.JsonPatchException; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.RecordTemplate; @@ -13,9 +10,10 @@ import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.aspect.batch.MCPItem; import com.linkedin.metadata.aspect.patch.template.common.GenericPatchTemplate; +import com.linkedin.metadata.entity.AspectUtils; +import com.linkedin.metadata.entity.EntityApiUtils; import com.linkedin.metadata.entity.EntityAspect; -import com.linkedin.metadata.entity.EntityUtils; -import com.linkedin.metadata.entity.validation.ValidationUtils; +import com.linkedin.metadata.entity.validation.ValidationApiUtils; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.utils.EntityKeyUtils; @@ -33,7 +31,6 @@ import lombok.Setter; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; -import org.apache.commons.lang3.StringUtils; @Slf4j @Getter @@ -54,7 +51,7 @@ public static ChangeItemImpl fromPatch( try { builder.recordTemplate(genericPatchTemplate.applyPatch(currentValue)); - } catch (JsonPatchException | IOException e) { + } catch (IOException e) { throw new RuntimeException(e); } @@ -90,26 +87,33 @@ public static ChangeItemImpl fromPatch( public SystemAspect getSystemAspect(@Nullable Long version) { EntityAspect entityAspect = new EntityAspect(); entityAspect.setAspect(getAspectName()); - entityAspect.setMetadata(EntityUtils.toJsonAspect(getRecordTemplate())); + entityAspect.setMetadata(EntityApiUtils.toJsonAspect(getRecordTemplate())); entityAspect.setUrn(getUrn().toString()); entityAspect.setVersion(version == null ? getNextAspectVersion() : version); entityAspect.setCreatedOn(new Timestamp(getAuditStamp().getTime())); entityAspect.setCreatedBy(getAuditStamp().getActor().toString()); - entityAspect.setSystemMetadata(EntityUtils.toJsonAspect(getSystemMetadata())); + entityAspect.setSystemMetadata(EntityApiUtils.toJsonAspect(getSystemMetadata())); return EntityAspect.EntitySystemAspect.builder() .build(getEntitySpec(), getAspectSpec(), entityAspect); } @Nonnull public MetadataChangeProposal getMetadataChangeProposal() { - final MetadataChangeProposal mcp = new MetadataChangeProposal(); - mcp.setEntityUrn(getUrn()); - mcp.setChangeType(getChangeType()); - mcp.setEntityType(getEntitySpec().getName()); - mcp.setAspectName(getAspectName()); - mcp.setAspect(GenericRecordUtils.serializeAspect(getRecordTemplate())); - mcp.setSystemMetadata(getSystemMetadata()); - return mcp; + if (metadataChangeProposal != null) { + return metadataChangeProposal; + } else { + final MetadataChangeProposal mcp = new MetadataChangeProposal(); + mcp.setEntityUrn(getUrn()); + mcp.setChangeType(getChangeType()); + mcp.setEntityType(getEntitySpec().getName()); + mcp.setAspectName(getAspectName()); + mcp.setAspect(GenericRecordUtils.serializeAspect(getRecordTemplate())); + mcp.setSystemMetadata(getSystemMetadata()); + mcp.setEntityKeyAspect( + GenericRecordUtils.serializeAspect( + EntityKeyUtils.convertUrnToEntityKey(getUrn(), entitySpec.getKeyAspectSpec()))); + return mcp; + } } public static class ChangeItemImplBuilder { @@ -129,16 +133,16 @@ public ChangeItemImpl build(AspectRetriever aspectRetriever) { // Apply change type default this.changeType = validateOrDefaultChangeType(changeType); - ValidationUtils.validateUrn(aspectRetriever.getEntityRegistry(), this.urn); + ValidationApiUtils.validateUrn(aspectRetriever.getEntityRegistry(), this.urn); log.debug("entity type = {}", this.urn.getEntityType()); entitySpec(aspectRetriever.getEntityRegistry().getEntitySpec(this.urn.getEntityType())); log.debug("entity spec = {}", this.entitySpec); - aspectSpec(ValidationUtils.validate(this.entitySpec, this.aspectName)); + aspectSpec(ValidationApiUtils.validate(this.entitySpec, this.aspectName)); log.debug("aspect spec = {}", this.aspectSpec); - ValidationUtils.validateRecordTemplate( + ValidationApiUtils.validateRecordTemplate( this.entitySpec, this.urn, this.recordTemplate, aspectRetriever); return new ChangeItemImpl( @@ -161,7 +165,7 @@ public static ChangeItemImpl build( log.debug("entity type = {}", mcp.getEntityType()); EntitySpec entitySpec = aspectRetriever.getEntityRegistry().getEntitySpec(mcp.getEntityType()); - AspectSpec aspectSpec = validateAspect(mcp, entitySpec); + AspectSpec aspectSpec = AspectUtils.validateAspect(mcp, entitySpec); if (!MCPItem.isValidChangeType(ChangeType.UPSERT, aspectSpec)) { throw new UnsupportedOperationException( @@ -191,9 +195,9 @@ public static ChangeItemImpl build( // specific to impl, other impls support PATCH, etc private static ChangeType validateOrDefaultChangeType(@Nullable ChangeType changeType) { final ChangeType finalChangeType = changeType == null ? ChangeType.UPSERT : changeType; - if (!CHANGE_TYPES.contains(finalChangeType)) { + if (!MCPItem.CHANGE_TYPES.contains(finalChangeType)) { throw new IllegalArgumentException( - String.format("ChangeType %s not in %s", changeType, CHANGE_TYPES)); + String.format("ChangeType %s not in %s", changeType, MCPItem.CHANGE_TYPES)); } return finalChangeType; } @@ -205,7 +209,7 @@ private static RecordTemplate convertToRecordTemplate( aspect = GenericRecordUtils.deserializeAspect( mcp.getAspect().getValue(), mcp.getAspect().getContentType(), aspectSpec); - ValidationUtils.validateOrThrow(aspect); + ValidationApiUtils.validateOrThrow(aspect); } catch (ModelConversionException e) { throw new RuntimeException( String.format( @@ -252,20 +256,4 @@ public String toString() { + systemMetadata + '}'; } - - public String toAbbreviatedString() { - return "ChangeItemImpl{" - + "changeType=" - + changeType - + ", urn=" - + urn - + ", aspectName='" - + aspectName - + '\'' - + ", recordTemplate=" - + StringUtils.abbreviate(recordTemplate.toString(), 256) - + ", systemMetadata=" - + StringUtils.abbreviate(systemMetadata.toString(), 128) - + '}'; - } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/DeleteItemImpl.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/DeleteItemImpl.java similarity index 90% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/DeleteItemImpl.java rename to metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/DeleteItemImpl.java index 0ab854198a282..9c1ded284fa0b 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/DeleteItemImpl.java +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/DeleteItemImpl.java @@ -7,9 +7,9 @@ import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.SystemAspect; import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.entity.EntityApiUtils; import com.linkedin.metadata.entity.EntityAspect; -import com.linkedin.metadata.entity.EntityUtils; -import com.linkedin.metadata.entity.validation.ValidationUtils; +import com.linkedin.metadata.entity.validation.ValidationApiUtils; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.mxe.MetadataChangeProposal; @@ -63,7 +63,7 @@ public SystemMetadata getSystemMetadata() { @Nullable @Override public MetadataChangeProposal getMetadataChangeProposal() { - return EntityUtils.buildMCP(getUrn(), aspectName, getChangeType(), null); + return EntityApiUtils.buildMCP(getUrn(), aspectName, getChangeType(), null); } @Nonnull @@ -96,13 +96,13 @@ private DeleteItemImpl build() { @SneakyThrows public DeleteItemImpl build(AspectRetriever aspectRetriever) { - ValidationUtils.validateUrn(aspectRetriever.getEntityRegistry(), this.urn); + ValidationApiUtils.validateUrn(aspectRetriever.getEntityRegistry(), this.urn); log.debug("entity type = {}", this.urn.getEntityType()); entitySpec(aspectRetriever.getEntityRegistry().getEntitySpec(this.urn.getEntityType())); log.debug("entity spec = {}", this.entitySpec); - aspectSpec(ValidationUtils.validate(this.entitySpec, this.aspectName)); + aspectSpec(ValidationApiUtils.validate(this.entitySpec, this.aspectName)); log.debug("aspect spec = {}", this.aspectSpec); return new DeleteItemImpl( diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCLItemImpl.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCLItemImpl.java similarity index 92% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCLItemImpl.java rename to metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCLItemImpl.java index 6efc1e78b543c..94d60d2f67c9c 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCLItemImpl.java +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/MCLItemImpl.java @@ -7,7 +7,7 @@ import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.batch.MCLItem; import com.linkedin.metadata.entity.AspectUtils; -import com.linkedin.metadata.entity.validation.ValidationUtils; +import com.linkedin.metadata.entity.validation.ValidationApiUtils; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -63,20 +63,21 @@ public MCLItemImpl build(AspectRetriever aspectRetriever) { EntityKeyUtils.getUrnFromLog( this.metadataChangeLog, this.entitySpec.getKeyAspectSpec()); } - ValidationUtils.validateUrn(entityRegistry, urn); + ValidationApiUtils.validateUrn(entityRegistry, urn); log.debug("entity type = {}", urn.getEntityType()); entitySpec(entityRegistry.getEntitySpec(urn.getEntityType())); log.debug("entity spec = {}", this.entitySpec); - aspectSpec(ValidationUtils.validate(this.entitySpec, this.metadataChangeLog.getAspectName())); + aspectSpec( + ValidationApiUtils.validate(this.entitySpec, this.metadataChangeLog.getAspectName())); log.debug("aspect spec = {}", this.aspectSpec); Pair aspects = convertToRecordTemplate(this.metadataChangeLog, aspectSpec); // validate new - ValidationUtils.validateRecordTemplate( + ValidationApiUtils.validateRecordTemplate( this.entitySpec, urn, aspects.getFirst(), aspectRetriever); return new MCLItemImpl( @@ -107,7 +108,7 @@ private static Pair convertToRecordTemplate( aspect = GenericRecordUtils.deserializeAspect( mcl.getAspect().getValue(), mcl.getAspect().getContentType(), aspectSpec); - ValidationUtils.validateOrThrow(aspect); + ValidationApiUtils.validateOrThrow(aspect); } else { aspect = null; } @@ -118,7 +119,7 @@ private static Pair convertToRecordTemplate( mcl.getPreviousAspectValue().getValue(), mcl.getPreviousAspectValue().getContentType(), aspectSpec); - ValidationUtils.validateOrThrow(prevAspect); + ValidationApiUtils.validateOrThrow(prevAspect); } else { prevAspect = null; } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/PatchItemImpl.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/PatchItemImpl.java similarity index 90% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/PatchItemImpl.java rename to metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/PatchItemImpl.java index cf9c3978e3a37..f4473c8db3148 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/batch/PatchItemImpl.java +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/ebean/batch/PatchItemImpl.java @@ -8,9 +8,6 @@ import com.fasterxml.jackson.core.StreamReadConstraints; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; -import com.github.fge.jsonpatch.JsonPatch; -import com.github.fge.jsonpatch.JsonPatchException; -import com.github.fge.jsonpatch.Patch; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.RecordTemplate; @@ -19,7 +16,7 @@ import com.linkedin.metadata.aspect.batch.MCPItem; import com.linkedin.metadata.aspect.batch.PatchMCP; import com.linkedin.metadata.aspect.patch.template.AspectTemplateEngine; -import com.linkedin.metadata.entity.validation.ValidationUtils; +import com.linkedin.metadata.entity.validation.ValidationApiUtils; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -27,7 +24,9 @@ import com.linkedin.metadata.utils.SystemMetadataUtils; import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.mxe.SystemMetadata; -import java.io.IOException; +import jakarta.json.Json; +import jakarta.json.JsonPatch; +import java.io.StringReader; import java.nio.charset.StandardCharsets; import java.util.Objects; import javax.annotation.Nonnull; @@ -59,7 +58,7 @@ public class PatchItemImpl implements PatchMCP { private final SystemMetadata systemMetadata; private final AuditStamp auditStamp; - private final Patch patch; + private final JsonPatch patch; private final MetadataChangeProposal metadataChangeProposal; @@ -108,7 +107,7 @@ public ChangeItemImpl applyPatch(RecordTemplate recordTemplate, AspectRetriever try { builder.recordTemplate( aspectTemplateEngine.applyPatch(currentValue, getPatch(), getAspectSpec())); - } catch (JsonProcessingException | JsonPatchException e) { + } catch (JsonProcessingException e) { throw new RuntimeException(e); } @@ -123,13 +122,13 @@ public PatchItemImpl.PatchItemImplBuilder systemMetadata(SystemMetadata systemMe } public PatchItemImpl build(EntityRegistry entityRegistry) { - ValidationUtils.validateUrn(entityRegistry, this.urn); + ValidationApiUtils.validateUrn(entityRegistry, this.urn); log.debug("entity type = {}", this.urn.getEntityType()); entitySpec(entityRegistry.getEntitySpec(this.urn.getEntityType())); log.debug("entity spec = {}", this.entitySpec); - aspectSpec(ValidationUtils.validate(this.entitySpec, this.aspectName)); + aspectSpec(ValidationApiUtils.validate(this.entitySpec, this.aspectName)); log.debug("aspect spec = {}", this.aspectSpec); if (this.patch == null) { @@ -178,12 +177,14 @@ public static PatchItemImpl build( .build(entityRegistry); } - private static Patch convertToJsonPatch(MetadataChangeProposal mcp) { + private static JsonPatch convertToJsonPatch(MetadataChangeProposal mcp) { JsonNode json; try { - json = OBJECT_MAPPER.readTree(mcp.getAspect().getValue().asString(StandardCharsets.UTF_8)); - return JsonPatch.fromJson(json); - } catch (IOException e) { + return Json.createPatch( + Json.createReader( + new StringReader(mcp.getAspect().getValue().asString(StandardCharsets.UTF_8))) + .readArray()); + } catch (RuntimeException e) { throw new IllegalArgumentException("Invalid JSON Patch: " + mcp.getAspect().getValue(), e); } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/validation/EntityRegistryUrnValidator.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation/EntityRegistryUrnValidator.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/validation/EntityRegistryUrnValidator.java rename to metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation/EntityRegistryUrnValidator.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/validation/RecordTemplateValidator.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation/RecordTemplateValidator.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/validation/RecordTemplateValidator.java rename to metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation/RecordTemplateValidator.java diff --git a/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation/ValidationApiUtils.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation/ValidationApiUtils.java new file mode 100644 index 0000000000000..ed79f23823a84 --- /dev/null +++ b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation/ValidationApiUtils.java @@ -0,0 +1,124 @@ +package com.linkedin.metadata.entity.validation; + +import com.linkedin.common.urn.Urn; +import com.linkedin.data.schema.validation.ValidationResult; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.entity.EntityApiUtils; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.models.registry.EntityRegistry; +import java.net.URISyntaxException; +import java.net.URLEncoder; +import java.util.function.Consumer; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class ValidationApiUtils { + public static final int URN_NUM_BYTES_LIMIT = 512; + public static final String URN_DELIMITER_SEPARATOR = "␟"; + + /** + * Validates a {@link RecordTemplate} and throws {@link ValidationException} if validation fails. + * + * @param record record to be validated. + */ + public static void validateOrThrow(RecordTemplate record) { + RecordTemplateValidator.validate( + record, + validationResult -> { + throw new ValidationException( + String.format( + "Failed to validate record with class %s: %s", + record.getClass().getName(), validationResult.getMessages().toString())); + }); + } + + public static void validateUrn(@Nonnull EntityRegistry entityRegistry, @Nonnull final Urn urn) { + EntityRegistryUrnValidator validator = new EntityRegistryUrnValidator(entityRegistry); + validator.setCurrentEntitySpec(entityRegistry.getEntitySpec(urn.getEntityType())); + RecordTemplateValidator.validate( + EntityApiUtils.buildKeyAspect(entityRegistry, urn), + validationResult -> { + throw new IllegalArgumentException( + "Invalid urn: " + urn + "\n Cause: " + validationResult.getMessages()); + }, + validator); + + if (urn.toString().trim().length() != urn.toString().length()) { + throw new IllegalArgumentException( + "Error: cannot provide an URN with leading or trailing whitespace"); + } + if (URLEncoder.encode(urn.toString()).length() > URN_NUM_BYTES_LIMIT) { + throw new IllegalArgumentException( + "Error: cannot provide an URN longer than " + + Integer.toString(URN_NUM_BYTES_LIMIT) + + " bytes (when URL encoded)"); + } + if (urn.toString().contains(URN_DELIMITER_SEPARATOR)) { + throw new IllegalArgumentException( + "Error: URN cannot contain " + URN_DELIMITER_SEPARATOR + " character"); + } + try { + Urn.createFromString(urn.toString()); + } catch (URISyntaxException e) { + throw new IllegalArgumentException(e); + } + } + + /** + * Validates a {@link RecordTemplate} and logs a warning if validation fails. + * + * @param record record to be validated.ailure. + */ + public static void validateOrWarn(RecordTemplate record) { + RecordTemplateValidator.validate( + record, + validationResult -> { + log.warn(String.format("Failed to validate record %s against its schema.", record)); + }); + } + + public static AspectSpec validate(EntitySpec entitySpec, String aspectName) { + if (aspectName == null || aspectName.isEmpty()) { + throw new UnsupportedOperationException( + "Aspect name is required for create and update operations"); + } + + AspectSpec aspectSpec = entitySpec.getAspectSpec(aspectName); + + if (aspectSpec == null) { + throw new RuntimeException( + String.format("Unknown aspect %s for entity %s", aspectName, entitySpec.getName())); + } + + return aspectSpec; + } + + public static void validateRecordTemplate( + EntitySpec entitySpec, + Urn urn, + @Nullable RecordTemplate aspect, + @Nonnull AspectRetriever aspectRetriever) { + EntityRegistry entityRegistry = aspectRetriever.getEntityRegistry(); + EntityRegistryUrnValidator validator = new EntityRegistryUrnValidator(entityRegistry); + validator.setCurrentEntitySpec(entitySpec); + Consumer resultFunction = + validationResult -> { + throw new IllegalArgumentException( + "Invalid format for aspect: " + + entitySpec.getName() + + "\n Cause: " + + validationResult.getMessages()); + }; + + RecordTemplateValidator.validate( + EntityApiUtils.buildKeyAspect(entityRegistry, urn), resultFunction, validator); + + if (aspect != null) { + RecordTemplateValidator.validate(aspect, resultFunction, validator); + } + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/validation/ValidationException.java b/metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation/ValidationException.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/validation/ValidationException.java rename to metadata-io/metadata-io-api/src/main/java/com/linkedin/metadata/entity/validation/ValidationException.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java b/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java index 5413fb8382d9d..21bac3cbb0e61 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/aspect/utils/DefaultAspectsUtil.java @@ -20,8 +20,8 @@ import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.aspect.batch.BatchItem; import com.linkedin.metadata.aspect.batch.MCPItem; +import com.linkedin.metadata.entity.EntityApiUtils; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.EntityUtils; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -157,7 +157,7 @@ private static List> generateDefaultAspects( // Key Aspect final String keyAspectName = opContext.getKeyAspectName(urn); defaultAspects.add( - Pair.of(keyAspectName, EntityUtils.buildKeyAspect(opContext.getEntityRegistry(), urn))); + Pair.of(keyAspectName, EntityApiUtils.buildKeyAspect(opContext.getEntityRegistry(), urn))); // Other Aspects defaultAspects.addAll( diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java index 5006788fa9d76..ec25a2fee76d5 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java @@ -7,6 +7,7 @@ import com.datahub.util.RecordUtils; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Iterators; import com.linkedin.aspect.GetTimeseriesAspectValuesResponse; import com.linkedin.common.AuditStamp; import com.linkedin.common.VersionedUrn; @@ -59,6 +60,8 @@ import java.net.URISyntaxException; import java.time.Clock; import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Objects; @@ -90,6 +93,7 @@ public class JavaEntityClient implements EntityClient { private final TimeseriesAspectService timeseriesAspectService; private final RollbackService rollbackService; private final EventProducer eventProducer; + private final int batchGetV2Size; @Override @Nullable @@ -121,7 +125,22 @@ public Map batchGetV2( throws RemoteInvocationException, URISyntaxException { final Set projectedAspects = aspectNames == null ? opContext.getEntityAspectNames(entityName) : aspectNames; - return entityService.getEntitiesV2(opContext, entityName, urns, projectedAspects); + + Map responseMap = new HashMap<>(); + + Iterators.partition(urns.iterator(), Math.max(1, batchGetV2Size)) + .forEachRemaining( + batch -> { + try { + responseMap.putAll( + entityService.getEntitiesV2( + opContext, entityName, new HashSet<>(batch), projectedAspects)); + } catch (URISyntaxException e) { + throw new RuntimeException(e); + } + }); + + return responseMap; } @Override @@ -130,11 +149,25 @@ public Map batchGetVersionedV2( @Nonnull OperationContext opContext, @Nonnull String entityName, @Nonnull final Set versionedUrns, - @Nullable final Set aspectNames) - throws RemoteInvocationException, URISyntaxException { + @Nullable final Set aspectNames) { final Set projectedAspects = aspectNames == null ? opContext.getEntityAspectNames(entityName) : aspectNames; - return entityService.getEntitiesVersionedV2(opContext, versionedUrns, projectedAspects); + + Map responseMap = new HashMap<>(); + + Iterators.partition(versionedUrns.iterator(), Math.max(1, batchGetV2Size)) + .forEachRemaining( + batch -> { + try { + responseMap.putAll( + entityService.getEntitiesVersionedV2( + opContext, new HashSet<>(batch), projectedAspects)); + } catch (URISyntaxException e) { + throw new RuntimeException(e); + } + }); + + return responseMap; } @Override diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java b/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java index deaf3e835615a..ab68abc69bce7 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/client/SystemJavaEntityClient.java @@ -42,7 +42,8 @@ public SystemJavaEntityClient( TimeseriesAspectService timeseriesAspectService, RollbackService rollbackService, EventProducer eventProducer, - EntityClientCacheConfig cacheConfig) { + EntityClientCacheConfig cacheConfig, + int batchGetV2Size) { super( entityService, deleteEntityService, @@ -52,7 +53,8 @@ public SystemJavaEntityClient( lineageSearchService, timeseriesAspectService, rollbackService, - eventProducer); + eventProducer, + batchGetV2Size); this.operationContextMap = CacheBuilder.newBuilder().maximumSize(500).build(); this.entityClientCache = buildEntityClientCache(SystemJavaEntityClient.class, cacheConfig); } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/connection/ConnectionService.java b/metadata-io/src/main/java/com/linkedin/metadata/connection/ConnectionService.java new file mode 100644 index 0000000000000..f044ea52a251a --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/connection/ConnectionService.java @@ -0,0 +1,129 @@ +package com.linkedin.metadata.connection; + +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.DataPlatformInstance; +import com.linkedin.common.urn.Urn; +import com.linkedin.connection.DataHubConnectionDetails; +import com.linkedin.connection.DataHubConnectionDetailsType; +import com.linkedin.connection.DataHubJsonConnection; +import com.linkedin.data.template.SetMode; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.entity.AspectUtils; +import com.linkedin.metadata.key.DataHubConnectionKey; +import com.linkedin.metadata.utils.EntityKeyUtils; +import com.linkedin.mxe.MetadataChangeProposal; +import io.datahubproject.metadata.context.OperationContext; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; +import java.util.UUID; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@RequiredArgsConstructor +public class ConnectionService { + + private final EntityClient _entityClient; + + /** + * Upserts a DataHub connection. If the connection with the provided ID already exists, then it + * will be overwritten. + * + *

This method assumes that authorization has already been verified at the calling layer. + * + * @return the URN of the new connection. + */ + public Urn upsertConnection( + @Nonnull OperationContext opContext, + @Nullable final String id, + @Nonnull final Urn platformUrn, + @Nonnull final DataHubConnectionDetailsType type, + @Nullable final DataHubJsonConnection json, + @Nullable final String name) { + Objects.requireNonNull(platformUrn, "platformUrn must not be null"); + Objects.requireNonNull(type, "type must not be null"); + Objects.requireNonNull(opContext, "opContext must not be null"); + + // 1. Optionally generate new connection id + final String connectionId = id != null ? id : UUID.randomUUID().toString(); + final DataHubConnectionKey key = new DataHubConnectionKey().setId(connectionId); + final Urn connectionUrn = + EntityKeyUtils.convertEntityKeyToUrn(key, Constants.DATAHUB_CONNECTION_ENTITY_NAME); + + // 2. Build Connection Details + final DataHubConnectionDetails details = new DataHubConnectionDetails(); + details.setType(type); + // default set name as ID if it exists, otherwise use name if it exists + details.setName(id, SetMode.IGNORE_NULL); + details.setName(name, SetMode.IGNORE_NULL); + + if (DataHubConnectionDetailsType.JSON.equals(details.getType())) { + if (json != null) { + details.setJson(json); + } else { + throw new IllegalArgumentException( + "Connections with type JSON must provide the field 'json'."); + } + } + + // 3. Build platform instance + final DataPlatformInstance platformInstance = new DataPlatformInstance(); + platformInstance.setPlatform(platformUrn); + + // 4. Write changes to GMS + try { + final List aspectsToIngest = new ArrayList<>(); + aspectsToIngest.add( + AspectUtils.buildMetadataChangeProposal( + connectionUrn, Constants.DATAHUB_CONNECTION_DETAILS_ASPECT_NAME, details)); + aspectsToIngest.add( + AspectUtils.buildMetadataChangeProposal( + connectionUrn, Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME, platformInstance)); + _entityClient.batchIngestProposals(opContext, aspectsToIngest, false); + } catch (Exception e) { + throw new RuntimeException( + String.format("Failed to upsert Connection with urn %s", connectionUrn), e); + } + return connectionUrn; + } + + @Nullable + public DataHubConnectionDetails getConnectionDetails( + @Nonnull OperationContext opContext, @Nonnull final Urn connectionUrn) { + Objects.requireNonNull(connectionUrn, "connectionUrn must not be null"); + final EntityResponse response = getConnectionEntityResponse(opContext, connectionUrn); + if (response != null + && response.getAspects().containsKey(Constants.DATAHUB_CONNECTION_DETAILS_ASPECT_NAME)) { + return new DataHubConnectionDetails( + response + .getAspects() + .get(Constants.DATAHUB_CONNECTION_DETAILS_ASPECT_NAME) + .getValue() + .data()); + } + // No aspect found + return null; + } + + @Nullable + public EntityResponse getConnectionEntityResponse( + @Nonnull OperationContext opContext, @Nonnull final Urn connectionUrn) { + try { + return _entityClient.getV2( + opContext, + Constants.DATAHUB_CONNECTION_ENTITY_NAME, + connectionUrn, + ImmutableSet.of( + Constants.DATAHUB_CONNECTION_DETAILS_ASPECT_NAME, + Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME)); + } catch (Exception e) { + throw new RuntimeException( + String.format("Failed to retrieve Connection with urn %s", connectionUrn), e); + } + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java index e836b69ef4305..646b995f87d00 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java @@ -3,6 +3,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.entity.ebean.EbeanAspectV2; +import com.linkedin.metadata.entity.ebean.PartitionedStream; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs; import com.linkedin.metadata.utils.metrics.MetricUtils; import io.ebean.Transaction; @@ -105,7 +106,7 @@ ListResult listUrns( Integer countAspect(@Nonnull final String aspectName, @Nullable String urnLike); @Nonnull - Stream> streamAspectBatches(final RestoreIndicesArgs args); + PartitionedStream streamAspectBatches(final RestoreIndicesArgs args); @Nonnull Stream streamAspects(String entityName, String aspectName); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityAspectIdentifier.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityAspectIdentifier.java index 887bd3910310d..e4b12c706ce28 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityAspectIdentifier.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityAspectIdentifier.java @@ -27,4 +27,14 @@ public static EntityAspectIdentifier fromCassandra(CassandraAspect cassandraAspe return new EntityAspectIdentifier( cassandraAspect.getUrn(), cassandraAspect.getAspect(), cassandraAspect.getVersion()); } + + public static EntityAspectIdentifier fromEntityAspect(EntityAspect entityAspect) { + return new EntityAspectIdentifier( + entityAspect.getUrn(), entityAspect.getAspect(), entityAspect.getVersion()); + } + + public static EntityAspectIdentifier fromSystemEntityAspect( + EntityAspect.EntitySystemAspect systemAspect) { + return fromEntityAspect(systemAspect.getEntityAspect()); + } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java index ef4724d4a4094..353b83726611e 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java @@ -48,6 +48,8 @@ import com.linkedin.metadata.aspect.plugins.validation.ValidationExceptionCollection; import com.linkedin.metadata.aspect.utils.DefaultAspectsUtil; import com.linkedin.metadata.config.PreProcessHooks; +import com.linkedin.metadata.entity.ebean.EbeanAspectV2; +import com.linkedin.metadata.entity.ebean.PartitionedStream; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import com.linkedin.metadata.entity.ebean.batch.DeleteItemImpl; @@ -227,7 +229,7 @@ public Map> getLatestAspects( .forEach( key -> { final RecordTemplate keyAspect = - EntityUtils.buildKeyAspect(opContext.getEntityRegistry(), key); + EntityApiUtils.buildKeyAspect(opContext.getEntityRegistry(), key); urnToAspects.get(key).add(keyAspect); }); @@ -666,14 +668,8 @@ public List ingestAspects( return Collections.emptyList(); } - log.info("Ingesting aspects batch to database: {}", aspectsBatch.toAbbreviatedString(2048)); - Timer.Context ingestToLocalDBTimer = - MetricUtils.timer(this.getClass(), "ingestAspectsToLocalDB").time(); List ingestResults = ingestAspectsToLocalDB(opContext, aspectsBatch, overwrite); - long took = ingestToLocalDBTimer.stop(); - log.info( - "Ingestion of aspects batch to database took {} ms", TimeUnit.NANOSECONDS.toMillis(took)); List mclResults = emitMCL(opContext, ingestResults, emitMCL); return mclResults; @@ -778,7 +774,17 @@ private List ingestAspectsToLocalDB( throw new ValidationException(exceptions.toString()); } + // No changes, return + if (changeMCPs.isEmpty()) { + return Collections.emptyList(); + } + // Database Upsert results + log.info( + "Ingesting aspects batch to database: {}", + AspectsBatch.toAbbreviatedString(changeMCPs, 2048)); + Timer.Context ingestToLocalDBTimer = + MetricUtils.timer(this.getClass(), "ingestAspectsToLocalDB").time(); List upsertResults = changeMCPs.stream() .map( @@ -827,6 +833,10 @@ private List ingestAspectsToLocalDB( if (tx != null) { tx.commitAndContinue(); } + long took = ingestToLocalDBTimer.stop(); + log.info( + "Ingestion of aspects batch to database took {} ms", + TimeUnit.NANOSECONDS.toMillis(took)); // Retention optimization and tx if (retentionService != null) { @@ -1046,7 +1056,7 @@ private Stream ingestTimeseriesProposal( .auditStamp(item.getAuditStamp()) .systemMetadata(item.getSystemMetadata()) .recordTemplate( - EntityUtils.buildKeyAspect( + EntityApiUtils.buildKeyAspect( opContext.getEntityRegistry(), item.getUrn())) .build(opContext.getRetrieverContext().get().getAspectRetriever())) .collect(Collectors.toList()); @@ -1240,7 +1250,7 @@ public Integer getCountAspect( @Nonnull @Override - public Stream streamRestoreIndices( + public List restoreIndices( @Nonnull OperationContext opContext, @Nonnull RestoreIndicesArgs args, @Nonnull Consumer logger) { @@ -1249,32 +1259,35 @@ public Stream streamRestoreIndices( logger.accept( String.format( "Reading rows %s through %s (0 == infinite) in batches of %s from the aspects table started.", - args.start, args.limit, args.batchSize)); + args.start, args.start + args.limit, args.batchSize)); long startTime = System.currentTimeMillis(); - return aspectDao - .streamAspectBatches(args) - .map( - batchStream -> { - long timeSqlQueryMs = System.currentTimeMillis() - startTime; - - List systemAspects = - EntityUtils.toSystemAspectFromEbeanAspects( - opContext.getRetrieverContext().get(), - batchStream.collect(Collectors.toList())); - - RestoreIndicesResult result = restoreIndices(opContext, systemAspects, logger); - result.timeSqlQueryMs = timeSqlQueryMs; - logger.accept("Batch completed."); - try { - TimeUnit.MILLISECONDS.sleep(args.batchDelayMs); - } catch (InterruptedException e) { - throw new RuntimeException( - "Thread interrupted while sleeping after successful batch migration."); - } - return result; - }); + try (PartitionedStream stream = aspectDao.streamAspectBatches(args)) { + return stream + .partition(args.batchSize) + .map( + batch -> { + long timeSqlQueryMs = System.currentTimeMillis() - startTime; + + List systemAspects = + EntityUtils.toSystemAspectFromEbeanAspects( + opContext.getRetrieverContext().get(), batch.collect(Collectors.toList())); + + RestoreIndicesResult result = restoreIndices(opContext, systemAspects, logger); + result.timeSqlQueryMs = timeSqlQueryMs; + + logger.accept("Batch completed."); + try { + TimeUnit.MILLISECONDS.sleep(args.batchDelayMs); + } catch (InterruptedException e) { + throw new RuntimeException( + "Thread interrupted while sleeping after successful batch migration."); + } + return result; + }) + .collect(Collectors.toList()); + } } @Nonnull @@ -1438,7 +1451,7 @@ private RestoreIndicesResult restoreIndices( .aspectSpec(entitySpec.getKeyAspectSpec()) .auditStamp(auditStamp) .systemMetadata(latestSystemMetadata) - .recordTemplate(EntityUtils.buildKeyAspect(opContext.getEntityRegistry(), urn)) + .recordTemplate(EntityApiUtils.buildKeyAspect(opContext.getEntityRegistry(), urn)) .build(opContext.getRetrieverContext().get().getAspectRetriever())); Stream defaultAspectsResult = ingestProposalSync( @@ -2278,7 +2291,8 @@ private Map getEnvelopedAspects( .collect( Collectors.toMap( systemAspect -> - ((EntityAspect.EntitySystemAspect) systemAspect).getAspectIdentifier(), + EntityAspectIdentifier.fromSystemEntityAspect( + (EntityAspect.EntitySystemAspect) systemAspect), systemAspect -> ((EntityAspect.EntitySystemAspect) systemAspect).toEnvelopedAspects())); } @@ -2334,13 +2348,13 @@ private UpdateAspectResult ingestAspectToLocalDB( // 4. Save the newValue as the latest version log.debug("Ingesting aspect with name {}, urn {}", aspectName, urn); - String newValueStr = EntityUtils.toJsonAspect(newValue); + String newValueStr = EntityApiUtils.toJsonAspect(newValue); long versionOfOld = aspectDao.saveLatestAspect( tx, urn.toString(), aspectName, - latest == null ? null : EntityUtils.toJsonAspect(oldValue), + latest == null ? null : EntityApiUtils.toJsonAspect(oldValue), latest == null ? null : latest.getCreatedBy(), latest == null ? null : latest.getEntityAspect().getCreatedFor(), latest == null ? null : latest.getCreatedOn(), @@ -2349,7 +2363,7 @@ private UpdateAspectResult ingestAspectToLocalDB( auditStamp.getActor().toString(), auditStamp.hasImpersonator() ? auditStamp.getImpersonator().toString() : null, new Timestamp(auditStamp.getTime()), - EntityUtils.toJsonAspect(providedSystemMetadata), + EntityApiUtils.toJsonAspect(providedSystemMetadata), nextVersion); // metrics diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java index 701cde1b4ef8a..e542b10af4ddc 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java @@ -1,7 +1,6 @@ package com.linkedin.metadata.entity; import static com.linkedin.metadata.Constants.*; -import static com.linkedin.metadata.utils.PegasusUtils.urnToEntityName; import com.datahub.util.RecordUtils; import com.google.common.base.Preconditions; @@ -14,7 +13,6 @@ import com.linkedin.entity.EntityResponse; import com.linkedin.entity.EnvelopedAspect; import com.linkedin.entity.EnvelopedAspectMap; -import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.aspect.ReadItem; import com.linkedin.metadata.aspect.RetrieverContext; import com.linkedin.metadata.aspect.SystemAspect; @@ -27,10 +25,8 @@ import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.snapshot.Snapshot; import com.linkedin.metadata.utils.EntityKeyUtils; -import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.metadata.utils.PegasusUtils; import com.linkedin.mxe.MetadataChangeProposal; -import com.linkedin.mxe.SystemMetadata; import com.linkedin.util.Pair; import io.datahubproject.metadata.context.OperationContext; import java.net.URISyntaxException; @@ -48,11 +44,6 @@ public class EntityUtils { private EntityUtils() {} - @Nonnull - public static String toJsonAspect(@Nonnull final RecordTemplate aspectRecord) { - return RecordUtils.toJsonString(aspectRecord); - } - @Nullable public static Urn getUrnFromString(String urnStr) { try { @@ -120,13 +111,6 @@ public static RecordTemplate getAspectFromEntity( } } - public static RecordTemplate buildKeyAspect( - @Nonnull EntityRegistry entityRegistry, @Nonnull final Urn urn) { - final EntitySpec spec = entityRegistry.getEntitySpec(urnToEntityName(urn)); - final AspectSpec keySpec = spec.getKeyAspectSpec(); - return EntityKeyUtils.convertUrnToEntityKey(urn, keySpec); - } - static Entity toEntity(@Nonnull final Snapshot snapshot) { return new Entity().setValue(snapshot); } @@ -163,7 +147,7 @@ static EntityResponse toEntityResponse( final Urn urn, final List envelopedAspects) { final EntityResponse response = new EntityResponse(); response.setUrn(urn); - response.setEntityName(urnToEntityName(urn)); + response.setEntityName(PegasusUtils.urnToEntityName(urn)); response.setAspects( new EnvelopedAspectMap( envelopedAspects.stream() @@ -181,7 +165,7 @@ static EntityResponse toEntityResponse( public static Optional toSystemAspect( @Nonnull RetrieverContext retrieverContext, @Nullable EntityAspect entityAspect) { return Optional.ofNullable(entityAspect) - .map(aspect -> EntityUtils.toSystemAspects(retrieverContext, List.of(aspect))) + .map(aspect -> toSystemAspects(retrieverContext, List.of(aspect))) .filter(systemAspects -> !systemAspects.isEmpty()) .map(systemAspects -> systemAspects.get(0)); } @@ -294,27 +278,4 @@ public static List toSystemAspects( return systemAspects; } - - public static MetadataChangeProposal buildMCP( - Urn entityUrn, String aspectName, ChangeType changeType, @Nullable T aspect) { - MetadataChangeProposal proposal = new MetadataChangeProposal(); - proposal.setEntityUrn(entityUrn); - proposal.setChangeType(changeType); - proposal.setEntityType(entityUrn.getEntityType()); - proposal.setAspectName(aspectName); - if (aspect != null) { - proposal.setAspect(GenericRecordUtils.serializeAspect(aspect)); - } - return proposal; - } - - public static SystemMetadata parseSystemMetadata(String jsonSystemMetadata) { - if (jsonSystemMetadata == null || jsonSystemMetadata.equals("")) { - SystemMetadata response = new SystemMetadata(); - response.setRunId(DEFAULT_RUN_ID); - response.setLastObserved(0); - return response; - } - return RecordUtils.toRecordTemplate(SystemMetadata.class, jsonSystemMetadata); - } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java index 71b9b9ad86f72..15c37b6c0085f 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/cassandra/CassandraAspectDao.java @@ -30,6 +30,7 @@ import com.linkedin.metadata.entity.EntityAspectIdentifier; import com.linkedin.metadata.entity.ListResult; import com.linkedin.metadata.entity.ebean.EbeanAspectV2; +import com.linkedin.metadata.entity.ebean.PartitionedStream; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs; import com.linkedin.metadata.query.ExtraInfo; import com.linkedin.metadata.query.ExtraInfoArray; @@ -200,7 +201,7 @@ public Map batchGet( return keys.stream() .map(this::getAspect) .filter(Objects::nonNull) - .collect(Collectors.toMap(EntityAspect::getAspectIdentifier, aspect -> aspect)); + .collect(Collectors.toMap(EntityAspectIdentifier::fromEntityAspect, aspect -> aspect)); } @Override @@ -491,7 +492,7 @@ public Integer countAspect(@Nonnull String aspectName, @Nullable String urnLike) } @Nonnull - public Stream> streamAspectBatches(final RestoreIndicesArgs args) { + public PartitionedStream streamAspectBatches(final RestoreIndicesArgs args) { // Not implemented return null; } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java index 4d5d51cb0ce7b..9725abdf7fdc2 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/EbeanAspectDao.java @@ -8,7 +8,6 @@ import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheLoader; import com.google.common.cache.LoadingCache; -import com.google.common.collect.Iterators; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; import com.linkedin.metadata.aspect.RetrieverContext; @@ -49,7 +48,6 @@ import java.util.Collection; import java.util.Collections; import java.util.HashMap; -import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -61,7 +59,6 @@ import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.Stream; -import java.util.stream.StreamSupport; import javax.annotation.Nonnull; import javax.annotation.Nullable; import javax.persistence.PersistenceException; @@ -497,9 +494,15 @@ public Integer countAspect(@Nonnull String aspectName, @Nullable String urnLike) return exp.findCount(); } + /** + * Warning this inner Streams must be closed + * + * @param args + * @return + */ @Nonnull @Override - public Stream> streamAspectBatches(final RestoreIndicesArgs args) { + public PartitionedStream streamAspectBatches(final RestoreIndicesArgs args) { ExpressionList exp = _server .find(EbeanAspectV2.class) @@ -548,25 +551,24 @@ public Stream> streamAspectBatches(final RestoreIndicesArg exp = exp.setMaxRows(args.limit); } - return partition( - exp.orderBy() - .asc(EbeanAspectV2.URN_COLUMN) - .orderBy() - .asc(EbeanAspectV2.ASPECT_COLUMN) - .setFirstRow(start) - .findStream(), - args.batchSize); - } - - private static Stream> partition(Stream source, int size) { - final Iterator it = source.iterator(); - final Iterator> partIt = - Iterators.transform(Iterators.partition(it, size), List::stream); - final Iterable> iterable = () -> partIt; - - return StreamSupport.stream(iterable.spliterator(), false); + return PartitionedStream.builder() + .delegateStream( + exp.orderBy() + .asc(EbeanAspectV2.URN_COLUMN) + .orderBy() + .asc(EbeanAspectV2.ASPECT_COLUMN) + .setFirstRow(start) + .findStream()) + .build(); } + /** + * Warning the stream must be closed + * + * @param entityName + * @param aspectName + * @return + */ @Override @Nonnull public Stream streamAspects(String entityName, String aspectName) { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/PartitionedStream.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/PartitionedStream.java new file mode 100644 index 0000000000000..1b7a856fb9729 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/ebean/PartitionedStream.java @@ -0,0 +1,29 @@ +package com.linkedin.metadata.entity.ebean; + +import com.google.common.collect.Iterators; +import java.util.Iterator; +import java.util.List; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; +import javax.annotation.Nonnull; +import lombok.Builder; +import lombok.experimental.Accessors; + +@Builder +@Accessors(fluent = true) +public class PartitionedStream implements AutoCloseable { + @Nonnull private final Stream delegateStream; + + public Stream> partition(int size) { + final Iterator it = delegateStream.iterator(); + final Iterator> partIt = + Iterators.transform(Iterators.partition(it, size), List::stream); + final Iterable> iterable = () -> partIt; + return StreamSupport.stream(iterable.spliterator(), false); + } + + @Override + public void close() { + delegateStream.close(); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/validation/ValidationUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/validation/ValidationUtils.java index 1cb36568feacc..ddcc6b6599231 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/validation/ValidationUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/validation/ValidationUtils.java @@ -3,21 +3,14 @@ import com.codahale.metrics.Timer; import com.linkedin.common.UrnArray; import com.linkedin.common.urn.Urn; -import com.linkedin.data.schema.validation.ValidationResult; import com.linkedin.data.template.AbstractArrayTemplate; -import com.linkedin.data.template.RecordTemplate; -import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.browse.BrowseResult; import com.linkedin.metadata.browse.BrowseResultEntity; import com.linkedin.metadata.browse.BrowseResultEntityArray; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.EntityUtils; import com.linkedin.metadata.graph.EntityLineageResult; import com.linkedin.metadata.graph.LineageRelationship; import com.linkedin.metadata.graph.LineageRelationshipArray; -import com.linkedin.metadata.models.AspectSpec; -import com.linkedin.metadata.models.EntitySpec; -import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.query.ListResult; import com.linkedin.metadata.search.LineageScrollResult; import com.linkedin.metadata.search.LineageSearchEntity; @@ -29,11 +22,8 @@ import com.linkedin.metadata.search.SearchResult; import com.linkedin.metadata.utils.metrics.MetricUtils; import io.datahubproject.metadata.context.OperationContext; -import java.net.URISyntaxException; -import java.net.URLEncoder; import java.util.Objects; import java.util.Set; -import java.util.function.Consumer; import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -43,110 +33,6 @@ @Slf4j public class ValidationUtils { - public static final int URN_NUM_BYTES_LIMIT = 512; - public static final String URN_DELIMITER_SEPARATOR = "␟"; - - /** - * Validates a {@link RecordTemplate} and throws {@link ValidationException} if validation fails. - * - * @param record record to be validated. - */ - public static void validateOrThrow(RecordTemplate record) { - RecordTemplateValidator.validate( - record, - validationResult -> { - throw new ValidationException( - String.format( - "Failed to validate record with class %s: %s", - record.getClass().getName(), validationResult.getMessages().toString())); - }); - } - - /** - * Validates a {@link RecordTemplate} and logs a warning if validation fails. - * - * @param record record to be validated.ailure. - */ - public static void validateOrWarn(RecordTemplate record) { - RecordTemplateValidator.validate( - record, - validationResult -> { - log.warn(String.format("Failed to validate record %s against its schema.", record)); - }); - } - - public static AspectSpec validate(EntitySpec entitySpec, String aspectName) { - if (aspectName == null || aspectName.isEmpty()) { - throw new UnsupportedOperationException( - "Aspect name is required for create and update operations"); - } - - AspectSpec aspectSpec = entitySpec.getAspectSpec(aspectName); - - if (aspectSpec == null) { - throw new RuntimeException( - String.format("Unknown aspect %s for entity %s", aspectName, entitySpec.getName())); - } - - return aspectSpec; - } - - public static void validateRecordTemplate( - EntitySpec entitySpec, - Urn urn, - @Nullable RecordTemplate aspect, - @Nonnull AspectRetriever aspectRetriever) { - EntityRegistry entityRegistry = aspectRetriever.getEntityRegistry(); - EntityRegistryUrnValidator validator = new EntityRegistryUrnValidator(entityRegistry); - validator.setCurrentEntitySpec(entitySpec); - Consumer resultFunction = - validationResult -> { - throw new IllegalArgumentException( - "Invalid format for aspect: " - + entitySpec.getName() - + "\n Cause: " - + validationResult.getMessages()); - }; - - RecordTemplateValidator.validate( - EntityUtils.buildKeyAspect(entityRegistry, urn), resultFunction, validator); - - if (aspect != null) { - RecordTemplateValidator.validate(aspect, resultFunction, validator); - } - } - - public static void validateUrn(@Nonnull EntityRegistry entityRegistry, @Nonnull final Urn urn) { - EntityRegistryUrnValidator validator = new EntityRegistryUrnValidator(entityRegistry); - validator.setCurrentEntitySpec(entityRegistry.getEntitySpec(urn.getEntityType())); - RecordTemplateValidator.validate( - EntityUtils.buildKeyAspect(entityRegistry, urn), - validationResult -> { - throw new IllegalArgumentException( - "Invalid urn: " + urn + "\n Cause: " + validationResult.getMessages()); - }, - validator); - - if (urn.toString().trim().length() != urn.toString().length()) { - throw new IllegalArgumentException( - "Error: cannot provide an URN with leading or trailing whitespace"); - } - if (URLEncoder.encode(urn.toString()).length() > URN_NUM_BYTES_LIMIT) { - throw new IllegalArgumentException( - "Error: cannot provide an URN longer than " - + Integer.toString(URN_NUM_BYTES_LIMIT) - + " bytes (when URL encoded)"); - } - if (urn.toString().contains(URN_DELIMITER_SEPARATOR)) { - throw new IllegalArgumentException( - "Error: URN cannot contain " + URN_DELIMITER_SEPARATOR + " character"); - } - try { - Urn.createFromString(urn.toString()); - } catch (URISyntaxException e) { - throw new IllegalArgumentException(e); - } - } public static SearchResult validateSearchResult( @Nonnull OperationContext opContext, @@ -407,7 +293,7 @@ private static Stream validateSearchUrns( .filter( urn -> { try { - validateUrn(opContext.getEntityRegistry(), urn); + ValidationApiUtils.validateUrn(opContext.getEntityRegistry(), urn); return true; } catch (Exception e) { log.warn( diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java index e2806f093f77e..a2c8070ea21a3 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java @@ -10,6 +10,7 @@ import com.linkedin.common.UrnArrayArray; import com.linkedin.common.urn.Urn; import com.linkedin.metadata.aspect.models.graph.Edge; +import com.linkedin.metadata.aspect.models.graph.RelatedEntities; import com.linkedin.metadata.aspect.models.graph.RelatedEntitiesScrollResult; import com.linkedin.metadata.aspect.models.graph.RelatedEntity; import com.linkedin.metadata.graph.EntityLineageResult; @@ -28,6 +29,7 @@ import com.linkedin.metadata.query.filter.RelationshipDirection; import com.linkedin.metadata.query.filter.RelationshipFilter; import com.linkedin.metadata.query.filter.SortCriterion; +import com.linkedin.metadata.search.elasticsearch.query.request.SearchAfterWrapper; import com.linkedin.metadata.utils.metrics.MetricUtils; import com.linkedin.util.Pair; import io.opentelemetry.extension.annotations.WithSpan; @@ -38,6 +40,7 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Optional; import java.util.Set; import java.util.StringJoiner; @@ -905,6 +908,99 @@ public RelatedEntitiesScrollResult scrollRelatedEntities( int count, @Nullable Long startTimeMillis, @Nullable Long endTimeMillis) { - throw new IllegalArgumentException("Not implemented"); + + if (sourceTypes != null && sourceTypes.isEmpty() + || destinationTypes != null && destinationTypes.isEmpty()) { + return new RelatedEntitiesScrollResult(0, 0, null, Collections.emptyList()); + } + + final String srcCriteria = filterToCriteria(sourceEntityFilter).trim(); + final String destCriteria = filterToCriteria(destinationEntityFilter).trim(); + final String edgeCriteria = relationshipFilterToCriteria(relationshipFilter); + + final RelationshipDirection relationshipDirection = relationshipFilter.getDirection(); + String srcNodeLabel = ""; + // Create a URN from the String. Only proceed if srcCriteria is not null or empty + if (srcCriteria != null && !srcCriteria.isEmpty()) { + final String urnValue = + sourceEntityFilter.getOr().get(0).getAnd().get(0).getValue().toString(); + try { + final Urn urn = Urn.createFromString(urnValue); + srcNodeLabel = urn.getEntityType(); + } catch (URISyntaxException e) { + log.error("Failed to parse URN: {} ", urnValue, e); + } + } + String matchTemplate = "MATCH (src:%s %s)-[r%s %s]-(dest %s)%s"; + if (relationshipDirection == RelationshipDirection.INCOMING) { + matchTemplate = "MATCH (src:%s %s)<-[r%s %s]-(dest %s)%s"; + } else if (relationshipDirection == RelationshipDirection.OUTGOING) { + matchTemplate = "MATCH (src:%s %s)-[r%s %s]->(dest %s)%s"; + } + + final String returnNodes = + String.format( + "RETURN dest, src, type(r)"); // Return both related entity and the relationship type. + final String returnCount = "RETURN count(*)"; // For getting the total results. + + String relationshipTypeFilter = ""; + if (!relationshipTypes.isEmpty()) { + relationshipTypeFilter = ":" + StringUtils.join(relationshipTypes, "|"); + } + + String whereClause = computeEntityTypeWhereClause(sourceTypes, destinationTypes); + + // Build Statement strings + String baseStatementString = + String.format( + matchTemplate, + srcNodeLabel, + srcCriteria, + relationshipTypeFilter, + edgeCriteria, + destCriteria, + whereClause); + + log.info(baseStatementString); + + final String resultStatementString = + String.format("%s %s SKIP $offset LIMIT $count", baseStatementString, returnNodes); + final String countStatementString = String.format("%s %s", baseStatementString, returnCount); + + int offset = 0; + if (Objects.nonNull(scrollId)) { + offset = Integer.valueOf(SearchAfterWrapper.fromScrollId(scrollId).getPitId().toString()); + } + + // Build Statements + final Statement resultStatement = + new Statement(resultStatementString, ImmutableMap.of("offset", offset, "count", count)); + final Statement countStatement = new Statement(countStatementString, Collections.emptyMap()); + + // Execute Queries + final List relatedEntities = + runQuery(resultStatement) + .list( + record -> + new RelatedEntities( + record.values().get(2).asString(), // Relationship Type + record.values().get(0).asNode().get("urn").asString(), + record.values().get(1).asNode().get("urn").asString(), + relationshipDirection, + null)); + final int totalCount = runQuery(countStatement).single().get(0).asInt(); + log.info("Total Related Entities: {}", totalCount); + // return new RelatedEntitiesResult(0, relatedEntities.size(), totalCount, relatedEntities); + String nextScrollId = null; + if (relatedEntities.size() == count) { + String pitId = Integer.toString(offset + count); + nextScrollId = new SearchAfterWrapper(null, pitId, 0L).toScrollId(); + } + return RelatedEntitiesScrollResult.builder() + .entities(relatedEntities) + .pageSize(relatedEntities.size()) + .numResults(totalCount) + .scrollId(nextScrollId) + .build(); } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java index d6bb1fb2401e8..d8c5c3317a2ec 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java @@ -336,7 +336,9 @@ public AutoCompleteResult autoComplete( IndexConvention indexConvention = opContext.getSearchContext().getIndexConvention(); AutocompleteRequestHandler builder = AutocompleteRequestHandler.getBuilder( - entitySpec, opContext.getRetrieverContext().get().getAspectRetriever()); + entitySpec, + customSearchConfiguration, + opContext.getRetrieverContext().get().getAspectRetriever()); SearchRequest req = builder.getSearchRequest( opContext, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java index 0acedc5d49171..62525bdd35b3f 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AutocompleteRequestHandler.java @@ -4,10 +4,14 @@ import static com.linkedin.metadata.search.utils.ESAccessControlUtil.restrictUrn; import static com.linkedin.metadata.search.utils.ESUtils.applyDefaultSearchFilters; +import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.StringArray; import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.config.search.custom.AutocompleteConfiguration; +import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; +import com.linkedin.metadata.config.search.custom.QueryConfiguration; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.SearchableFieldSpec; import com.linkedin.metadata.models.annotation.SearchableAnnotation; @@ -18,9 +22,9 @@ import com.linkedin.metadata.search.utils.ESUtils; import io.datahubproject.metadata.context.OperationContext; import java.net.URISyntaxException; +import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; -import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Optional; @@ -35,7 +39,9 @@ import org.opensearch.action.search.SearchResponse; import org.opensearch.index.query.BoolQueryBuilder; import org.opensearch.index.query.MultiMatchQueryBuilder; +import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.query.functionscore.FunctionScoreQueryBuilder; import org.opensearch.search.SearchHit; import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.search.fetch.subphase.highlight.HighlightBuilder; @@ -51,9 +57,17 @@ public class AutocompleteRequestHandler { private final AspectRetriever aspectRetriever; + private final CustomizedQueryHandler customizedQueryHandler; + + private final EntitySpec entitySpec; + public AutocompleteRequestHandler( - @Nonnull EntitySpec entitySpec, @Nonnull AspectRetriever aspectRetriever) { + @Nonnull EntitySpec entitySpec, + @Nullable CustomSearchConfiguration customSearchConfiguration, + @Nonnull AspectRetriever aspectRetriever) { + this.entitySpec = entitySpec; List fieldSpecs = entitySpec.getSearchableFieldSpecs(); + this.customizedQueryHandler = CustomizedQueryHandler.builder(customSearchConfiguration).build(); _defaultAutocompleteFields = Stream.concat( fieldSpecs.stream() @@ -80,9 +94,13 @@ public AutocompleteRequestHandler( } public static AutocompleteRequestHandler getBuilder( - @Nonnull EntitySpec entitySpec, @Nonnull AspectRetriever aspectRetriever) { + @Nonnull EntitySpec entitySpec, + @Nullable CustomSearchConfiguration customSearchConfiguration, + @Nonnull AspectRetriever aspectRetriever) { return AUTOCOMPLETE_QUERY_BUILDER_BY_ENTITY_NAME.computeIfAbsent( - entitySpec, k -> new AutocompleteRequestHandler(entitySpec, aspectRetriever)); + entitySpec, + k -> + new AutocompleteRequestHandler(entitySpec, customSearchConfiguration, aspectRetriever)); } public SearchRequest getSearchRequest( @@ -94,24 +112,90 @@ public SearchRequest getSearchRequest( SearchRequest searchRequest = new SearchRequest(); SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); searchSourceBuilder.size(limit); - // apply default filters - BoolQueryBuilder boolQueryBuilder = - applyDefaultSearchFilters(opContext, filter, getQuery(input, field)); - searchSourceBuilder.query(boolQueryBuilder); - searchSourceBuilder.postFilter( - ESUtils.buildFilterQuery(filter, false, searchableFieldTypes, aspectRetriever)); + AutocompleteConfiguration customAutocompleteConfig = + customizedQueryHandler.lookupAutocompleteConfig(input).orElse(null); + QueryConfiguration customQueryConfig = + customizedQueryHandler.lookupQueryConfig(input).orElse(null); + + // Initial query with input filters + BoolQueryBuilder baseQuery = + ESUtils.buildFilterQuery(filter, false, searchableFieldTypes, aspectRetriever); + + // Add autocomplete query + baseQuery.should(getQuery(opContext.getObjectMapper(), customAutocompleteConfig, input, field)); + + // Apply default filters + BoolQueryBuilder queryWithDefaultFilters = + applyDefaultSearchFilters(opContext, filter, baseQuery); + + // Apply scoring + FunctionScoreQueryBuilder functionScoreQueryBuilder = + Optional.ofNullable(customAutocompleteConfig) + .flatMap( + cac -> + CustomizedQueryHandler.functionScoreQueryBuilder( + opContext.getObjectMapper(), + cac, + queryWithDefaultFilters, + customQueryConfig)) + .orElse( + SearchQueryBuilder.buildScoreFunctions( + opContext, customQueryConfig, List.of(entitySpec), queryWithDefaultFilters)); + searchSourceBuilder.query(functionScoreQueryBuilder); + + ESUtils.buildSortOrder(searchSourceBuilder, null, List.of(entitySpec)); + + // wire inner non-scored query searchSourceBuilder.highlighter(getHighlights(field)); searchRequest.source(searchSourceBuilder); return searchRequest; } - private BoolQueryBuilder getQuery(@Nonnull String query, @Nullable String field) { - return getQuery(getAutocompleteFields(field), query); + private BoolQueryBuilder getQuery( + @Nonnull ObjectMapper objectMapper, + @Nullable AutocompleteConfiguration customAutocompleteConfig, + @Nonnull String query, + @Nullable String field) { + return getQuery(objectMapper, customAutocompleteConfig, getAutocompleteFields(field), query); + } + + public BoolQueryBuilder getQuery( + @Nonnull ObjectMapper objectMapper, + @Nullable AutocompleteConfiguration customAutocompleteConfig, + List autocompleteFields, + @Nonnull String query) { + + BoolQueryBuilder finalQuery = + Optional.ofNullable(customAutocompleteConfig) + .flatMap(cac -> CustomizedQueryHandler.boolQueryBuilder(objectMapper, cac, query)) + .orElse(QueryBuilders.boolQuery()) + .minimumShouldMatch(1); + + getAutocompleteQuery(customAutocompleteConfig, autocompleteFields, query) + .ifPresent(finalQuery::should); + + return finalQuery; + } + + private Optional getAutocompleteQuery( + @Nullable AutocompleteConfiguration customConfig, + List autocompleteFields, + @Nonnull String query) { + Optional result = Optional.empty(); + + if (customConfig == null || customConfig.isDefaultQuery()) { + result = Optional.of(defaultQuery(autocompleteFields, query)); + } + + return result; } - public static BoolQueryBuilder getQuery(List autocompleteFields, @Nonnull String query) { + private static BoolQueryBuilder defaultQuery( + List autocompleteFields, @Nonnull String query) { BoolQueryBuilder finalQuery = QueryBuilders.boolQuery(); + finalQuery.minimumShouldMatch(1); + // Search for exact matches with higher boost and ngram matches MultiMatchQueryBuilder autocompleteQueryBuilder = QueryBuilders.multiMatchQuery(query).type(MultiMatchQueryBuilder.Type.BOOL_PREFIX); @@ -154,6 +238,12 @@ private HighlightBuilder getHighlights(@Nullable String field) { .field(fieldName + ".*") .field(fieldName + ".ngram") .field(fieldName + ".delimited")); + + // set field match req false for ngram + highlightBuilder.fields().stream() + .filter(f -> f.name().contains("ngram")) + .forEach(f -> f.requireFieldMatch(false).noMatchSize(200)); + return highlightBuilder; } @@ -168,8 +258,9 @@ public AutoCompleteResult extractResult( @Nonnull OperationContext opContext, @Nonnull SearchResponse searchResponse, @Nonnull String input) { - Set results = new LinkedHashSet<>(); - Set entityResults = new HashSet<>(); + // use lists to preserve ranking + List results = new ArrayList<>(); + List entityResults = new ArrayList<>(); for (SearchHit hit : searchResponse.getHits()) { Optional matchedFieldValue = @@ -181,13 +272,15 @@ public AutoCompleteResult extractResult( if (matchedUrn.isPresent()) { Urn autoCompleteUrn = Urn.createFromString(matchedUrn.get()); if (!restrictUrn(opContext, autoCompleteUrn)) { - entityResults.add( - new AutoCompleteEntity().setUrn(Urn.createFromString(matchedUrn.get()))); - matchedFieldValue.ifPresent(results::add); + matchedFieldValue.ifPresent( + value -> { + entityResults.add(new AutoCompleteEntity().setUrn(autoCompleteUrn)); + results.add(value); + }); } } } catch (URISyntaxException e) { - throw new RuntimeException(String.format("Failed to create urn %s", matchedUrn.get()), e); + log.warn(String.format("Failed to create urn %s", matchedUrn.get())); } } return new AutoCompleteResult() diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/CustomizedQueryHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/CustomizedQueryHandler.java index 478d633fe3c55..0dbdf80860f7f 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/CustomizedQueryHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/CustomizedQueryHandler.java @@ -1,26 +1,54 @@ package com.linkedin.metadata.search.elasticsearch.query.request; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.metadata.config.search.custom.AutocompleteConfiguration; +import com.linkedin.metadata.config.search.custom.BoolQueryConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.config.search.custom.QueryConfiguration; +import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.regex.Pattern; import java.util.stream.Collectors; +import java.util.stream.Stream; +import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.Builder; import lombok.Getter; import lombok.extern.slf4j.Slf4j; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.xcontent.LoggingDeprecationHandler; +import org.opensearch.common.xcontent.XContentType; +import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.functionscore.FunctionScoreQueryBuilder; +import org.opensearch.search.SearchModule; @Slf4j @Builder(builderMethodName = "hiddenBuilder") @Getter public class CustomizedQueryHandler { + private static final NamedXContentRegistry X_CONTENT_REGISTRY; + + static { + SearchModule searchModule = new SearchModule(Settings.EMPTY, Collections.emptyList()); + X_CONTENT_REGISTRY = new NamedXContentRegistry(searchModule.getNamedXContents()); + } + private CustomSearchConfiguration customSearchConfiguration; @Builder.Default private List> queryConfigurations = List.of(); + @Builder.Default + private List> autocompleteConfigurations = + List.of(); + public Optional lookupQueryConfig(String query) { return queryConfigurations.stream() .filter(e -> e.getKey().matcher(query).matches()) @@ -28,6 +56,129 @@ public Optional lookupQueryConfig(String query) { .findFirst(); } + public Optional lookupAutocompleteConfig(String query) { + return autocompleteConfigurations.stream() + .filter(e -> e.getKey().matcher(query).matches()) + .map(Map.Entry::getValue) + .findFirst(); + } + + public static String unquote(String query) { + return query.replaceAll("[\"']", ""); + } + + public static boolean isQuoted(String query) { + return Stream.of("\"", "'").anyMatch(query::contains); + } + + public static Optional boolQueryBuilder( + @Nonnull ObjectMapper objectMapper, + QueryConfiguration customQueryConfiguration, + String query) { + if (customQueryConfiguration.getBoolQuery() != null) { + log.debug( + "Using custom query configuration queryRegex: {}", + customQueryConfiguration.getQueryRegex()); + } + return Optional.ofNullable(customQueryConfiguration.getBoolQuery()) + .map(bq -> toBoolQueryBuilder(objectMapper, query, bq)); + } + + public static Optional boolQueryBuilder( + @Nonnull ObjectMapper objectMapper, + AutocompleteConfiguration customAutocompleteConfiguration, + String query) { + if (customAutocompleteConfiguration.getBoolQuery() != null) { + log.debug( + "Using custom query autocomplete queryRegex: {}", + customAutocompleteConfiguration.getQueryRegex()); + } + return Optional.ofNullable(customAutocompleteConfiguration.getBoolQuery()) + .map(bq -> toBoolQueryBuilder(objectMapper, query, bq)); + } + + private static BoolQueryBuilder toBoolQueryBuilder( + @Nonnull ObjectMapper objectMapper, String query, BoolQueryConfiguration boolQuery) { + try { + String jsonFragment = + objectMapper + .writeValueAsString(boolQuery) + .replace("\"{{query_string}}\"", objectMapper.writeValueAsString(query)) + .replace( + "\"{{unquoted_query_string}}\"", objectMapper.writeValueAsString(unquote(query))); + XContentParser parser = + XContentType.JSON + .xContent() + .createParser(X_CONTENT_REGISTRY, LoggingDeprecationHandler.INSTANCE, jsonFragment); + return BoolQueryBuilder.fromXContent(parser); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + public static FunctionScoreQueryBuilder functionScoreQueryBuilder( + @Nonnull ObjectMapper objectMapper, + @Nonnull QueryConfiguration customQueryConfiguration, + QueryBuilder queryBuilder) { + return toFunctionScoreQueryBuilder( + objectMapper, queryBuilder, customQueryConfiguration.getFunctionScore()); + } + + public static Optional functionScoreQueryBuilder( + @Nonnull ObjectMapper objectMapper, + @Nonnull AutocompleteConfiguration customAutocompleteConfiguration, + QueryBuilder queryBuilder, + @Nullable QueryConfiguration customQueryConfiguration) { + + Optional result = Optional.empty(); + + if ((customAutocompleteConfiguration.getFunctionScore() == null + || customAutocompleteConfiguration.getFunctionScore().isEmpty()) + && customAutocompleteConfiguration.isInheritFunctionScore() + && customQueryConfiguration != null) { + log.debug( + "Inheriting query configuration for autocomplete function scoring: " + + customQueryConfiguration); + // inherit if not overridden + result = + Optional.of( + toFunctionScoreQueryBuilder( + objectMapper, queryBuilder, customQueryConfiguration.getFunctionScore())); + } else if (customAutocompleteConfiguration.getFunctionScore() != null + && !customAutocompleteConfiguration.getFunctionScore().isEmpty()) { + log.debug("Applying custom autocomplete function scores."); + result = + Optional.of( + toFunctionScoreQueryBuilder( + objectMapper, queryBuilder, customAutocompleteConfiguration.getFunctionScore())); + } + + return result; + } + + private static FunctionScoreQueryBuilder toFunctionScoreQueryBuilder( + @Nonnull ObjectMapper objectMapper, + @Nonnull QueryBuilder queryBuilder, + @Nonnull Map params) { + try { + HashMap body = new HashMap<>(params); + if (!body.isEmpty()) { + log.debug("Using custom scoring functions: {}", body); + } + + body.put("query", objectMapper.readValue(queryBuilder.toString(), Map.class)); + + String jsonFragment = objectMapper.writeValueAsString(Map.of("function_score", body)); + XContentParser parser = + XContentType.JSON + .xContent() + .createParser(X_CONTENT_REGISTRY, LoggingDeprecationHandler.INSTANCE, jsonFragment); + return (FunctionScoreQueryBuilder) FunctionScoreQueryBuilder.parseInnerQueryBuilder(parser); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + public static CustomizedQueryHandlerBuilder builder( @Nullable CustomSearchConfiguration customSearchConfiguration) { CustomizedQueryHandlerBuilder builder = @@ -38,7 +189,12 @@ public static CustomizedQueryHandlerBuilder builder( customSearchConfiguration.getQueryConfigurations().stream() .map(cfg -> Map.entry(Pattern.compile(cfg.getQueryRegex()), cfg)) .collect(Collectors.toList())); + builder.autocompleteConfigurations( + customSearchConfiguration.getAutocompleteConfigurations().stream() + .map(cfg -> Map.entry(Pattern.compile(cfg.getQueryRegex()), cfg)) + .collect(Collectors.toList())); } + return builder; } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java index 048987ef5aa6a..33195d4ea807d 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java @@ -3,18 +3,14 @@ import static com.linkedin.metadata.Constants.SKIP_REFERENCE_ASPECT; import static com.linkedin.metadata.models.SearchableFieldSpecExtractor.PRIMARY_URN_SEARCH_PROPERTIES; import static com.linkedin.metadata.search.elasticsearch.indexbuilder.SettingsBuilder.*; -import static com.linkedin.metadata.search.elasticsearch.query.request.SearchFieldConfig.*; +import static com.linkedin.metadata.search.elasticsearch.query.request.CustomizedQueryHandler.isQuoted; +import static com.linkedin.metadata.search.elasticsearch.query.request.CustomizedQueryHandler.unquote; -import com.fasterxml.jackson.annotation.JsonInclude; -import com.fasterxml.jackson.core.StreamReadConstraints; -import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.annotations.VisibleForTesting; -import com.linkedin.metadata.Constants; import com.linkedin.metadata.config.search.ExactMatchConfiguration; import com.linkedin.metadata.config.search.PartialConfiguration; import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.WordGramConfiguration; -import com.linkedin.metadata.config.search.custom.BoolQueryConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.config.search.custom.QueryConfiguration; import com.linkedin.metadata.models.AspectSpec; @@ -28,10 +24,8 @@ import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.search.utils.ESUtils; import io.datahubproject.metadata.context.OperationContext; -import java.io.IOException; import java.util.ArrayList; import java.util.Collection; -import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -39,18 +33,12 @@ import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; -import java.util.stream.Stream; import javax.annotation.Nonnull; import javax.annotation.Nullable; import lombok.extern.slf4j.Slf4j; import org.opensearch.common.lucene.search.function.CombineFunction; import org.opensearch.common.lucene.search.function.FieldValueFactorFunction; import org.opensearch.common.lucene.search.function.FunctionScoreQuery; -import org.opensearch.common.settings.Settings; -import org.opensearch.common.xcontent.LoggingDeprecationHandler; -import org.opensearch.common.xcontent.XContentType; -import org.opensearch.core.xcontent.NamedXContentRegistry; -import org.opensearch.core.xcontent.XContentParser; import org.opensearch.index.query.BoolQueryBuilder; import org.opensearch.index.query.Operator; import org.opensearch.index.query.QueryBuilder; @@ -60,32 +48,9 @@ import org.opensearch.index.query.functionscore.FieldValueFactorFunctionBuilder; import org.opensearch.index.query.functionscore.FunctionScoreQueryBuilder; import org.opensearch.index.query.functionscore.ScoreFunctionBuilders; -import org.opensearch.search.SearchModule; @Slf4j public class SearchQueryBuilder { - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - static { - OBJECT_MAPPER.setSerializationInclusion(JsonInclude.Include.NON_NULL); - int maxSize = - Integer.parseInt( - System.getenv() - .getOrDefault( - Constants.INGESTION_MAX_SERIALIZED_STRING_LENGTH, - Constants.MAX_JACKSON_STRING_SIZE)); - OBJECT_MAPPER - .getFactory() - .setStreamReadConstraints(StreamReadConstraints.builder().maxStringLength(maxSize).build()); - } - - private static final NamedXContentRegistry X_CONTENT_REGISTRY; - - static { - SearchModule searchModule = new SearchModule(Settings.EMPTY, Collections.emptyList()); - X_CONTENT_REGISTRY = new NamedXContentRegistry(searchModule.getNamedXContents()); - } - public static final String STRUCTURED_QUERY_PREFIX = "\\\\/q "; private final ExactMatchConfiguration exactMatchConfiguration; private final PartialConfiguration partialConfiguration; @@ -112,7 +77,7 @@ public QueryBuilder buildQuery( final QueryBuilder queryBuilder = buildInternalQuery(opContext, customQueryConfig, entitySpecs, query, fulltext); - return buildScoreFunctions(customQueryConfig, entitySpecs, queryBuilder); + return buildScoreFunctions(opContext, customQueryConfig, entitySpecs, queryBuilder); } /** @@ -133,7 +98,10 @@ private QueryBuilder buildInternalQuery( final String sanitizedQuery = query.replaceFirst("^:+", ""); final BoolQueryBuilder finalQuery = Optional.ofNullable(customQueryConfig) - .flatMap(cqc -> boolQueryBuilder(cqc, sanitizedQuery)) + .flatMap( + cqc -> + CustomizedQueryHandler.boolQueryBuilder( + opContext.getObjectMapper(), cqc, sanitizedQuery)) .orElse(QueryBuilders.boolQuery()) .minimumShouldMatch(1); @@ -326,14 +294,6 @@ private Set getStandardFields( return fields; } - private static String unquote(String query) { - return query.replaceAll("[\"']", ""); - } - - private static boolean isQuoted(String query) { - return Stream.of("\"", "'").anyMatch(query::contains); - } - private Optional getSimpleQuery( @Nonnull EntityRegistry entityRegistry, @Nullable QueryConfiguration customQueryConfig, @@ -410,13 +370,20 @@ private Optional getPrefixAndExactMatchQuery( getStandardFields(entityRegistry, entitySpecs) .forEach( searchFieldConfig -> { + boolean caseSensitivityEnabled = + exactMatchConfiguration.getCaseSensitivityFactor() > 0.0f; + float caseSensitivityFactor = + caseSensitivityEnabled + ? exactMatchConfiguration.getCaseSensitivityFactor() + : 1.0f; + if (searchFieldConfig.isDelimitedSubfield() && isPrefixQuery) { finalQuery.should( QueryBuilders.matchPhrasePrefixQuery(searchFieldConfig.fieldName(), query) .boost( searchFieldConfig.boost() * exactMatchConfiguration.getPrefixFactor() - * exactMatchConfiguration.getCaseSensitivityFactor()) + * caseSensitivityFactor) .queryName(searchFieldConfig.shortName())); // less than exact } @@ -425,13 +392,16 @@ private Optional getPrefixAndExactMatchQuery( // The non-.keyword field removes case information // Exact match case-sensitive - finalQuery.should( - QueryBuilders.termQuery( - ESUtils.toKeywordField(searchFieldConfig.fieldName(), false), - unquotedQuery) - .caseInsensitive(false) - .boost(searchFieldConfig.boost() * exactMatchConfiguration.getExactFactor()) - .queryName(searchFieldConfig.shortName())); + if (caseSensitivityEnabled) { + finalQuery.should( + QueryBuilders.termQuery( + ESUtils.toKeywordField(searchFieldConfig.fieldName(), false), + unquotedQuery) + .caseInsensitive(false) + .boost( + searchFieldConfig.boost() * exactMatchConfiguration.getExactFactor()) + .queryName(searchFieldConfig.shortName())); + } // Exact match case-insensitive finalQuery.should( @@ -442,7 +412,7 @@ private Optional getPrefixAndExactMatchQuery( .boost( searchFieldConfig.boost() * exactMatchConfiguration.getExactFactor() - * exactMatchConfiguration.getCaseSensitivityFactor()) + * caseSensitivityFactor) .queryName(searchFieldConfig.fieldName())); } @@ -485,14 +455,16 @@ private Optional getStructuredQuery( return result; } - private FunctionScoreQueryBuilder buildScoreFunctions( + static FunctionScoreQueryBuilder buildScoreFunctions( + @Nonnull OperationContext opContext, @Nullable QueryConfiguration customQueryConfig, @Nonnull List entitySpecs, @Nonnull QueryBuilder queryBuilder) { if (customQueryConfig != null) { // Prefer configuration function scoring over annotation scoring - return functionScoreQueryBuilder(customQueryConfig, queryBuilder); + return CustomizedQueryHandler.functionScoreQueryBuilder( + opContext.getObjectMapper(), customQueryConfig, queryBuilder); } else { return QueryBuilders.functionScoreQuery( queryBuilder, buildAnnotationScoreFunctions(entitySpecs)) @@ -586,62 +558,6 @@ private static FieldValueFactorFunction.Modifier mapModifier( } } - public FunctionScoreQueryBuilder functionScoreQueryBuilder( - QueryConfiguration customQueryConfiguration, QueryBuilder queryBuilder) { - return toFunctionScoreQueryBuilder(queryBuilder, customQueryConfiguration.getFunctionScore()); - } - - public Optional boolQueryBuilder( - QueryConfiguration customQueryConfiguration, String query) { - if (customQueryConfiguration.getBoolQuery() != null) { - log.debug( - "Using custom query configuration queryRegex: {}", - customQueryConfiguration.getQueryRegex()); - } - return Optional.ofNullable(customQueryConfiguration.getBoolQuery()) - .map(bq -> toBoolQueryBuilder(query, bq)); - } - - private BoolQueryBuilder toBoolQueryBuilder(String query, BoolQueryConfiguration boolQuery) { - try { - String jsonFragment = - OBJECT_MAPPER - .writeValueAsString(boolQuery) - .replace("\"{{query_string}}\"", OBJECT_MAPPER.writeValueAsString(query)) - .replace( - "\"{{unquoted_query_string}}\"", - OBJECT_MAPPER.writeValueAsString(unquote(query))); - XContentParser parser = - XContentType.JSON - .xContent() - .createParser(X_CONTENT_REGISTRY, LoggingDeprecationHandler.INSTANCE, jsonFragment); - return BoolQueryBuilder.fromXContent(parser); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - private FunctionScoreQueryBuilder toFunctionScoreQueryBuilder( - QueryBuilder queryBuilder, Map params) { - try { - HashMap body = new HashMap<>(params); - if (!body.isEmpty()) { - log.debug("Using custom scoring functions: {}", body); - } - - body.put("query", OBJECT_MAPPER.readValue(queryBuilder.toString(), Map.class)); - - String jsonFragment = OBJECT_MAPPER.writeValueAsString(Map.of("function_score", body)); - XContentParser parser = - XContentType.JSON - .xContent() - .createParser(X_CONTENT_REGISTRY, LoggingDeprecationHandler.INSTANCE, jsonFragment); - return (FunctionScoreQueryBuilder) FunctionScoreQueryBuilder.parseInnerQueryBuilder(parser); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - public float getWordGramFactor(String fieldName) { if (fieldName.endsWith("Grams2")) { return wordGramConfiguration.getTwoGramFactor(); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/BusinessAttributeUpdateHookService.java b/metadata-io/src/main/java/com/linkedin/metadata/service/BusinessAttributeUpdateHookService.java index 32fbff73a1d20..b1a74b9c09d35 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/service/BusinessAttributeUpdateHookService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/service/BusinessAttributeUpdateHookService.java @@ -25,7 +25,7 @@ import java.util.Arrays; import java.util.Map; import java.util.Set; -import java.util.function.Consumer; +import java.util.function.BiConsumer; import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Value; @@ -75,16 +75,22 @@ public void handleChangeEvent( Urn urn = entityChangeEvent.getEntityUrn(); log.info("Business Attribute update hook invoked for urn :" + urn); - - fetchRelatedEntities(opContext, urn, batch -> processBatch(opContext, batch), null, 0); + fetchRelatedEntities( + opContext, + urn, + (batch, batchNumber) -> processBatch(opContext, batch, batchNumber), + null, + 0, + 1); } private void fetchRelatedEntities( @NonNull final OperationContext opContext, @NonNull final Urn urn, - @NonNull final Consumer resultConsumer, + @NonNull final BiConsumer resultConsumer, @Nullable String scrollId, - int consumedEntityCount) { + int consumedEntityCount, + int batchNumber) { GraphRetriever graph = opContext.getRetrieverContext().get().getGraphRetriever(); RelatedEntitiesScrollResult result = @@ -100,22 +106,21 @@ private void fetchRelatedEntities( getRelatedEntitiesBatchSize, null, null); - resultConsumer.accept(result); - + resultConsumer.accept(result, batchNumber); + consumedEntityCount = consumedEntityCount + result.getEntities().size(); if (result.getScrollId() != null && consumedEntityCount < relatedEntitiesCount) { + batchNumber = batchNumber + 1; fetchRelatedEntities( - opContext, - urn, - resultConsumer, - result.getScrollId(), - consumedEntityCount + result.getEntities().size()); + opContext, urn, resultConsumer, result.getScrollId(), consumedEntityCount, batchNumber); } } private void processBatch( - @NonNull OperationContext opContext, @NonNull RelatedEntitiesScrollResult batch) { + @NonNull OperationContext opContext, + @NonNull RelatedEntitiesScrollResult batch, + int batchNumber) { AspectRetriever aspectRetriever = opContext.getRetrieverContext().get().getAspectRetriever(); - + log.info("BA Update Batch {} started", batchNumber); Set entityUrns = batch.getEntities().stream() .map(RelatedEntity::getUrn) @@ -147,5 +152,6 @@ private void processBatch( null, null)); }); + log.info("BA Update Batch {} completed", batchNumber); } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/EditableSchemaMetadataChangeEventGenerator.java b/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/EditableSchemaMetadataChangeEventGenerator.java index 1f094bb6ca989..4850fde426f00 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/EditableSchemaMetadataChangeEventGenerator.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/EditableSchemaMetadataChangeEventGenerator.java @@ -5,6 +5,7 @@ import com.datahub.util.RecordUtils; import com.github.fge.jsonpatch.JsonPatch; +import com.google.common.collect.ImmutableMap; import com.linkedin.common.AuditStamp; import com.linkedin.common.GlobalTags; import com.linkedin.common.GlossaryTerms; @@ -166,6 +167,7 @@ private static ChangeEvent getDocumentationChangeEvent( targetFieldInfo.getFieldPath(), datasetFieldUrn, targetFieldDescription)) + .parameters(ImmutableMap.of("description", targetFieldDescription)) .auditStamp(auditStamp) .build(); } @@ -183,6 +185,7 @@ private static ChangeEvent getDocumentationChangeEvent( Optional.ofNullable(targetFieldInfo).map(EditableSchemaFieldInfo::getFieldPath), datasetFieldUrn, baseFieldDescription)) + .parameters(ImmutableMap.of("description", baseFieldDescription)) .auditStamp(auditStamp) .build(); } @@ -203,6 +206,7 @@ private static ChangeEvent getDocumentationChangeEvent( datasetFieldUrn, baseFieldDescription, targetFieldDescription)) + .parameters(ImmutableMap.of("description", targetFieldDescription)) .auditStamp(auditStamp) .build(); } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/SchemaMetadataChangeEventGenerator.java b/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/SchemaMetadataChangeEventGenerator.java index 1fd5d6e2c0f7a..483ab806c8462 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/SchemaMetadataChangeEventGenerator.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeline/eventgenerator/SchemaMetadataChangeEventGenerator.java @@ -4,6 +4,7 @@ import com.datahub.util.RecordUtils; import com.github.fge.jsonpatch.JsonPatch; +import com.google.common.collect.ImmutableMap; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.DatasetUrn; import com.linkedin.common.urn.Urn; @@ -62,6 +63,7 @@ private static ChangeEvent getDescriptionChange( .description( String.format( FIELD_DESCRIPTION_ADDED_FORMAT, targetDescription, targetField.getFieldPath())) + .parameters(ImmutableMap.of("description", targetDescription)) .auditStamp(auditStamp) .build(); } @@ -75,6 +77,7 @@ private static ChangeEvent getDescriptionChange( .description( String.format( FIELD_DESCRIPTION_REMOVED_FORMAT, baseDescription, baseField.getFieldPath())) + .parameters(ImmutableMap.of("description", baseDescription)) .auditStamp(auditStamp) .build(); } @@ -91,6 +94,7 @@ private static ChangeEvent getDescriptionChange( baseField.getFieldPath(), baseDescription, targetDescription)) + .parameters(ImmutableMap.of("description", targetDescription)) .auditStamp(auditStamp) .build(); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/client/JavaEntityClientTest.java b/metadata-io/src/test/java/com/linkedin/metadata/client/JavaEntityClientTest.java index 3a10875d1a60a..2ca966b104e03 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/client/JavaEntityClientTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/client/JavaEntityClientTest.java @@ -70,7 +70,8 @@ private JavaEntityClient getJavaEntityClient() { _lineageSearchService, _timeseriesAspectService, rollbackService, - _eventProducer); + _eventProducer, + 1); } @Test diff --git a/metadata-io/src/test/java/com/linkedin/metadata/connection/ConnectionServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/connection/ConnectionServiceTest.java new file mode 100644 index 0000000000000..658c66807ccf1 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/connection/ConnectionServiceTest.java @@ -0,0 +1,147 @@ +package com.linkedin.metadata.connection; + +import static org.mockito.Mockito.*; +import static org.testng.Assert.*; + +import com.datahub.authentication.Authentication; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.DataPlatformInstance; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.connection.DataHubConnectionDetails; +import com.linkedin.connection.DataHubConnectionDetailsType; +import com.linkedin.connection.DataHubJsonConnection; +import com.linkedin.entity.Aspect; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.entity.EnvelopedAspectMap; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.entity.AspectUtils; +import com.linkedin.metadata.key.DataHubConnectionKey; +import com.linkedin.metadata.utils.EntityKeyUtils; +import io.datahubproject.metadata.context.OperationContext; +import org.mockito.Mockito; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class ConnectionServiceTest { + + private EntityClient entityClient; + private Authentication systemAuthentication; + private ConnectionService connectionService; + + @BeforeMethod + public void setUp() { + entityClient = Mockito.mock(EntityClient.class); + systemAuthentication = Mockito.mock(Authentication.class); + connectionService = new ConnectionService(entityClient); + } + + @Test + public void testUpsertConnection() throws Exception { + final String id = "testId"; + final Urn platformUrn = UrnUtils.getUrn("urn:li:dataPlatform:slack"); + final DataHubConnectionDetailsType type = DataHubConnectionDetailsType.JSON; + final DataHubJsonConnection json = new DataHubJsonConnection().setEncryptedBlob("blob"); + final Authentication authentication = Mockito.mock(Authentication.class); + final DataHubConnectionKey key = new DataHubConnectionKey().setId(id); + final Urn connectionUrn = + EntityKeyUtils.convertEntityKeyToUrn(key, Constants.DATAHUB_CONNECTION_ENTITY_NAME); + + // Execute and assert + Urn result = + connectionService.upsertConnection( + mock(OperationContext.class), id, platformUrn, type, json, null); + + DataHubConnectionDetails expectedDetails = mockConnectionDetails(id); + DataPlatformInstance expectedDataPlatformInstance = mockPlatformInstance(platformUrn); + + verify(entityClient) + .batchIngestProposals( + any(OperationContext.class), + Mockito.eq( + ImmutableList.of( + AspectUtils.buildMetadataChangeProposal( + connectionUrn, + Constants.DATAHUB_CONNECTION_DETAILS_ASPECT_NAME, + expectedDetails), + AspectUtils.buildMetadataChangeProposal( + connectionUrn, + Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME, + expectedDataPlatformInstance))), + Mockito.eq(false)); + assertEquals(result, connectionUrn); + } + + @Test + public void testGetConnectionDetails() throws Exception { + final Urn connectionUrn = Mockito.mock(Urn.class); + + final DataHubConnectionDetails connectionDetails = mockConnectionDetails("testId"); + final DataPlatformInstance platformInstance = + mockPlatformInstance(UrnUtils.getUrn("urn:li:dataPlatform:slack")); + + EntityResponse response = + new EntityResponse() + .setEntityName(Constants.DATAHUB_CONNECTION_ENTITY_NAME) + .setUrn(connectionUrn) + .setAspects( + new EnvelopedAspectMap( + ImmutableMap.of( + Constants.DATAHUB_CONNECTION_DETAILS_ASPECT_NAME, + new EnvelopedAspect() + .setName(Constants.DATAHUB_CONNECTION_ENTITY_NAME) + .setValue(new Aspect(connectionDetails.data())), + Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME, + new EnvelopedAspect() + .setName(Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME) + .setValue(new Aspect(platformInstance.data()))))); + when(entityClient.getV2( + any(OperationContext.class), + Mockito.eq(Constants.DATAHUB_CONNECTION_ENTITY_NAME), + Mockito.eq(connectionUrn), + Mockito.eq( + ImmutableSet.of( + Constants.DATAHUB_CONNECTION_DETAILS_ASPECT_NAME, + Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME)))) + .thenReturn(response); + + // Execute and assert + DataHubConnectionDetails details = + connectionService.getConnectionDetails(mock(OperationContext.class), connectionUrn); + assertEquals(details, connectionDetails); + } + + @Test + public void testGetConnectionEntityResponse() throws Exception { + final Urn connectionUrn = Mockito.mock(Urn.class); + EntityResponse response = Mockito.mock(EntityResponse.class); + when(entityClient.getV2( + any(OperationContext.class), + Mockito.eq(Constants.DATAHUB_CONNECTION_ENTITY_NAME), + Mockito.eq(connectionUrn), + Mockito.eq( + ImmutableSet.of( + Constants.DATAHUB_CONNECTION_DETAILS_ASPECT_NAME, + Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME)))) + .thenReturn(response); + // Execute and assert + assertEquals( + connectionService.getConnectionEntityResponse(mock(OperationContext.class), connectionUrn), + response); + } + + private DataHubConnectionDetails mockConnectionDetails(String id) { + return new DataHubConnectionDetails() + .setType(DataHubConnectionDetailsType.JSON) + .setName(id) + .setJson(new DataHubJsonConnection().setEncryptedBlob("blob")); + } + + private DataPlatformInstance mockPlatformInstance(Urn platformUrn) { + return new DataPlatformInstance().setPlatform(platformUrn); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanAspectMigrationsDaoTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanAspectMigrationsDaoTest.java index 683120929c8ec..5ab7e686cb671 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanAspectMigrationsDaoTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanAspectMigrationsDaoTest.java @@ -53,9 +53,12 @@ public void testStreamAspects() throws AssertionError { List ingestedUrns = ingestedAspects.keySet().stream().map(Urn::toString).collect(Collectors.toList()); - Stream aspectStream = - _migrationsDao.streamAspects(CORP_USER_ENTITY_NAME, CORP_USER_KEY_ASPECT_NAME); - List aspectList = aspectStream.collect(Collectors.toList()); + List aspectList; + try (Stream stream = + _migrationsDao.streamAspects(CORP_USER_ENTITY_NAME, CORP_USER_KEY_ASPECT_NAME)) { + aspectList = stream.collect(Collectors.toList()); + } + assertEquals(ingestedUrns.size(), aspectList.size()); Set urnsFetched = aspectList.stream().map(EntityAspect::getUrn).collect(Collectors.toSet()); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java index 9875e7efab63a..45d4fe4f46c99 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java @@ -45,8 +45,8 @@ import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs; +import com.linkedin.metadata.entity.validation.ValidationApiUtils; import com.linkedin.metadata.entity.validation.ValidationException; -import com.linkedin.metadata.entity.validation.ValidationUtils; import com.linkedin.metadata.event.EventProducer; import com.linkedin.metadata.key.CorpUserKey; import com.linkedin.metadata.models.AspectSpec; @@ -58,6 +58,7 @@ import com.linkedin.metadata.service.UpdateIndicesService; import com.linkedin.metadata.snapshot.CorpUserSnapshot; import com.linkedin.metadata.snapshot.Snapshot; +import com.linkedin.metadata.utils.EntityKeyUtils; import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.mxe.GenericAspect; import com.linkedin.mxe.MetadataChangeLog; @@ -84,6 +85,7 @@ import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; +import java.util.stream.Stream; import org.junit.Assert; import org.mockito.ArgumentCaptor; import org.mockito.Mockito; @@ -533,6 +535,11 @@ public void testReingestAspectsGetLatestAspects() throws Exception { initialChangeLog.setAspect(aspect); initialChangeLog.setSystemMetadata(metadata1); + initialChangeLog.setEntityKeyAspect( + GenericRecordUtils.serializeAspect( + EntityKeyUtils.convertUrnToEntityKey( + entityUrn, + _testEntityRegistry.getEntitySpec(entityUrn.getEntityType()).getKeyAspectSpec()))); final MetadataChangeLog restateChangeLog = new MetadataChangeLog(); restateChangeLog.setEntityType(entityUrn.getEntityType()); @@ -595,6 +602,11 @@ public void testReingestLineageAspect() throws Exception { initialChangeLog.setAspect(aspect); initialChangeLog.setSystemMetadata(metadata1); + initialChangeLog.setEntityKeyAspect( + GenericRecordUtils.serializeAspect( + EntityKeyUtils.convertUrnToEntityKey( + entityUrn, + _testEntityRegistry.getEntitySpec(entityUrn.getEntityType()).getKeyAspectSpec()))); final MetadataChangeLog restateChangeLog = new MetadataChangeLog(); restateChangeLog.setEntityType(entityUrn.getEntityType()); @@ -606,6 +618,11 @@ public void testReingestLineageAspect() throws Exception { restateChangeLog.setSystemMetadata(metadata1); restateChangeLog.setPreviousAspectValue(aspect); restateChangeLog.setPreviousSystemMetadata(simulatePullFromDB(metadata1, SystemMetadata.class)); + restateChangeLog.setEntityKeyAspect( + GenericRecordUtils.serializeAspect( + EntityKeyUtils.convertUrnToEntityKey( + entityUrn, + _testEntityRegistry.getEntitySpec(entityUrn.getEntityType()).getKeyAspectSpec()))); Map latestAspects = _entityServiceImpl.getLatestAspectsForUrn( @@ -988,7 +1005,7 @@ public void testRollbackKey() throws AssertionError { CorpUserInfo writeAspect1 = AspectGenerationUtils.createCorpUserInfo("email@test.com"); RecordTemplate writeKey1 = - EntityUtils.buildKeyAspect(opContext.getEntityRegistry(), entityUrn1); + EntityApiUtils.buildKeyAspect(opContext.getEntityRegistry(), entityUrn1); // Ingest CorpUserInfo Aspect #1 Overwrite CorpUserInfo writeAspect1Overwrite = @@ -1075,7 +1092,7 @@ public void testRollbackUrn() throws AssertionError { CorpUserInfo writeAspect1 = AspectGenerationUtils.createCorpUserInfo("email@test.com"); RecordTemplate writeKey1 = - EntityUtils.buildKeyAspect(opContext.getEntityRegistry(), entityUrn1); + EntityApiUtils.buildKeyAspect(opContext.getEntityRegistry(), entityUrn1); // Ingest CorpUserInfo Aspect #2 CorpUserInfo writeAspect2 = AspectGenerationUtils.createCorpUserInfo("email2@test.com"); @@ -1246,10 +1263,10 @@ public void testIngestGetLatestAspect() throws AssertionError { assertTrue(DataTemplateUtil.areEqual(writeAspect2, readAspect2)); assertTrue( DataTemplateUtil.areEqual( - EntityUtils.parseSystemMetadata(readAspectDao2.getSystemMetadata()), metadata2)); + EntityApiUtils.parseSystemMetadata(readAspectDao2.getSystemMetadata()), metadata2)); assertTrue( DataTemplateUtil.areEqual( - EntityUtils.parseSystemMetadata(readAspectDao1.getSystemMetadata()), metadata1)); + EntityApiUtils.parseSystemMetadata(readAspectDao1.getSystemMetadata()), metadata1)); verify(_mockProducer, times(1)) .produceMetadataChangeLog(Mockito.eq(entityUrn), Mockito.any(), mclCaptor.capture()); @@ -1328,10 +1345,10 @@ public void testIngestGetLatestEnvelopedAspect() throws Exception { DataTemplateUtil.areEqual(writeAspect2, new CorpUserInfo(readAspect2.getValue().data()))); assertTrue( DataTemplateUtil.areEqual( - EntityUtils.parseSystemMetadata(readAspectDao2.getSystemMetadata()), metadata2)); + EntityApiUtils.parseSystemMetadata(readAspectDao2.getSystemMetadata()), metadata2)); assertTrue( DataTemplateUtil.areEqual( - EntityUtils.parseSystemMetadata(readAspectDao1.getSystemMetadata()), metadata1)); + EntityApiUtils.parseSystemMetadata(readAspectDao1.getSystemMetadata()), metadata1)); verify(_mockProducer, times(2)) .produceMetadataChangeLog( @@ -1452,14 +1469,14 @@ public void testIngestSameAspect() throws AssertionError { assertTrue(DataTemplateUtil.areEqual(writeAspect2, readAspect2)); assertFalse( DataTemplateUtil.areEqual( - EntityUtils.parseSystemMetadata(readAspectDao2.getSystemMetadata()), metadata2)); + EntityApiUtils.parseSystemMetadata(readAspectDao2.getSystemMetadata()), metadata2)); assertFalse( DataTemplateUtil.areEqual( - EntityUtils.parseSystemMetadata(readAspectDao2.getSystemMetadata()), metadata1)); + EntityApiUtils.parseSystemMetadata(readAspectDao2.getSystemMetadata()), metadata1)); assertTrue( DataTemplateUtil.areEqual( - EntityUtils.parseSystemMetadata(readAspectDao2.getSystemMetadata()), metadata3)); + EntityApiUtils.parseSystemMetadata(readAspectDao2.getSystemMetadata()), metadata3)); verify(_mockProducer, times(0)) .produceMetadataChangeLog(Mockito.any(), Mockito.any(), Mockito.any()); @@ -1706,9 +1723,7 @@ public void testRestoreIndices() throws Exception { args.batchDelayMs(1L); args.numThreads(1); args.urn(urnStr); - _entityServiceImpl - .streamRestoreIndices(opContext, args, obj -> {}) - .collect(Collectors.toList()); + _entityServiceImpl.restoreIndices(opContext, args, obj -> {}); ArgumentCaptor mclCaptor = ArgumentCaptor.forClass(MetadataChangeLog.class); @@ -1727,12 +1742,12 @@ public void testRestoreIndices() throws Exception { public void testValidateUrn() throws Exception { // Valid URN Urn validTestUrn = new Urn("li", "corpuser", new TupleKey("testKey")); - ValidationUtils.validateUrn(opContext.getEntityRegistry(), validTestUrn); + ValidationApiUtils.validateUrn(opContext.getEntityRegistry(), validTestUrn); // URN with trailing whitespace Urn testUrnWithTrailingWhitespace = new Urn("li", "corpuser", new TupleKey("testKey ")); try { - ValidationUtils.validateUrn(opContext.getEntityRegistry(), testUrnWithTrailingWhitespace); + ValidationApiUtils.validateUrn(opContext.getEntityRegistry(), testUrnWithTrailingWhitespace); Assert.fail("Should have raised IllegalArgumentException for URN with trailing whitespace"); } catch (IllegalArgumentException e) { assertEquals( @@ -1744,7 +1759,7 @@ public void testValidateUrn() throws Exception { Urn testUrnTooLong = new Urn("li", "corpuser", new TupleKey(stringTooLong)); try { - ValidationUtils.validateUrn(opContext.getEntityRegistry(), testUrnTooLong); + ValidationApiUtils.validateUrn(opContext.getEntityRegistry(), testUrnTooLong); Assert.fail("Should have raised IllegalArgumentException for URN too long"); } catch (IllegalArgumentException e) { assertEquals( @@ -1763,9 +1778,9 @@ public void testValidateUrn() throws Exception { Urn testUrnSameLengthWhenEncoded = new Urn("li", "corpUser", new TupleKey(buildStringSameLengthWhenEncoded.toString())); // Same length when encoded should be allowed, the encoded one should not be - ValidationUtils.validateUrn(opContext.getEntityRegistry(), testUrnSameLengthWhenEncoded); + ValidationApiUtils.validateUrn(opContext.getEntityRegistry(), testUrnSameLengthWhenEncoded); try { - ValidationUtils.validateUrn(opContext.getEntityRegistry(), testUrnTooLongWhenEncoded); + ValidationApiUtils.validateUrn(opContext.getEntityRegistry(), testUrnTooLongWhenEncoded); Assert.fail("Should have raised IllegalArgumentException for URN too long"); } catch (IllegalArgumentException e) { assertEquals( @@ -1775,9 +1790,9 @@ public void testValidateUrn() throws Exception { // Urn containing disallowed character Urn testUrnSpecialCharValid = new Urn("li", "corpUser", new TupleKey("bob␇")); Urn testUrnSpecialCharInvalid = new Urn("li", "corpUser", new TupleKey("bob␟")); - ValidationUtils.validateUrn(opContext.getEntityRegistry(), testUrnSpecialCharValid); + ValidationApiUtils.validateUrn(opContext.getEntityRegistry(), testUrnSpecialCharValid); try { - ValidationUtils.validateUrn(opContext.getEntityRegistry(), testUrnSpecialCharInvalid); + ValidationApiUtils.validateUrn(opContext.getEntityRegistry(), testUrnSpecialCharInvalid); Assert.fail( "Should have raised IllegalArgumentException for URN containing the illegal char"); } catch (IllegalArgumentException e) { @@ -1786,7 +1801,7 @@ public void testValidateUrn() throws Exception { Urn urnWithMismatchedParens = new Urn("li", "corpuser", new TupleKey("test(Key")); try { - ValidationUtils.validateUrn(opContext.getEntityRegistry(), urnWithMismatchedParens); + ValidationApiUtils.validateUrn(opContext.getEntityRegistry(), urnWithMismatchedParens); Assert.fail("Should have raised IllegalArgumentException for URN with mismatched parens"); } catch (IllegalArgumentException e) { assertTrue(e.getMessage().contains("mismatched paren nesting")); @@ -1794,7 +1809,7 @@ public void testValidateUrn() throws Exception { Urn invalidType = new Urn("li", "fakeMadeUpType", new TupleKey("testKey")); try { - ValidationUtils.validateUrn(opContext.getEntityRegistry(), invalidType); + ValidationApiUtils.validateUrn(opContext.getEntityRegistry(), invalidType); Assert.fail( "Should have raised IllegalArgumentException for URN with non-existent entity type"); } catch (IllegalArgumentException e) { @@ -1803,12 +1818,12 @@ public void testValidateUrn() throws Exception { Urn validFabricType = new Urn("li", "dataset", new TupleKey("urn:li:dataPlatform:foo", "bar", "PROD")); - ValidationUtils.validateUrn(opContext.getEntityRegistry(), validFabricType); + ValidationApiUtils.validateUrn(opContext.getEntityRegistry(), validFabricType); Urn invalidFabricType = new Urn("li", "dataset", new TupleKey("urn:li:dataPlatform:foo", "bar", "prod")); try { - ValidationUtils.validateUrn(opContext.getEntityRegistry(), invalidFabricType); + ValidationApiUtils.validateUrn(opContext.getEntityRegistry(), invalidFabricType); Assert.fail("Should have raised IllegalArgumentException for URN with invalid fabric type"); } catch (IllegalArgumentException e) { assertTrue(e.getMessage().contains(invalidFabricType.toString())); @@ -1817,7 +1832,7 @@ public void testValidateUrn() throws Exception { Urn urnEndingInComma = new Urn("li", "dataset", new TupleKey("urn:li:dataPlatform:foo", "bar", "PROD", "")); try { - ValidationUtils.validateUrn(opContext.getEntityRegistry(), urnEndingInComma); + ValidationApiUtils.validateUrn(opContext.getEntityRegistry(), urnEndingInComma); Assert.fail("Should have raised IllegalArgumentException for URN ending in comma"); } catch (IllegalArgumentException e) { assertTrue(e.getMessage().contains(urnEndingInComma.toString())); @@ -1907,16 +1922,22 @@ public void testStructuredPropertyIngestProposal() throws Exception { assertEquals( _entityServiceImpl.getAspect(opContext, firstPropertyUrn, definitionAspectName, 0), structuredPropertyDefinition); - Set defs = - _aspectDao - .streamAspects( - STRUCTURED_PROPERTY_ENTITY_NAME, STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME) - .map( - entityAspect -> - EntityUtils.toSystemAspect(opContext.getRetrieverContext().get(), entityAspect) - .get() - .getAspect(StructuredPropertyDefinition.class)) - .collect(Collectors.toSet()); + + Set defs; + try (Stream stream = + _aspectDao.streamAspects( + STRUCTURED_PROPERTY_ENTITY_NAME, STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME)) { + defs = + stream + .map( + entityAspect -> + EntityUtils.toSystemAspect( + opContext.getRetrieverContext().get(), entityAspect) + .get() + .getAspect(StructuredPropertyDefinition.class)) + .collect(Collectors.toSet()); + } + assertEquals(defs.size(), 1); assertEquals(defs, Set.of(structuredPropertyDefinition)); @@ -1983,16 +2004,20 @@ public void testStructuredPropertyIngestProposal() throws Exception { assertEquals( _entityServiceImpl.getAspect(opContext, secondPropertyUrn, definitionAspectName, 0), secondDefinition); - defs = - _aspectDao - .streamAspects( - STRUCTURED_PROPERTY_ENTITY_NAME, STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME) - .map( - entityAspect -> - EntityUtils.toSystemAspect(opContext.getRetrieverContext().get(), entityAspect) - .get() - .getAspect(StructuredPropertyDefinition.class)) - .collect(Collectors.toSet()); + try (Stream stream = + _aspectDao.streamAspects( + STRUCTURED_PROPERTY_ENTITY_NAME, STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME)) { + defs = + stream + .map( + entityAspect -> + EntityUtils.toSystemAspect( + opContext.getRetrieverContext().get(), entityAspect) + .get() + .getAspect(StructuredPropertyDefinition.class)) + .collect(Collectors.toSet()); + } + assertEquals(defs.size(), 2); assertEquals(defs, Set.of(secondDefinition, structuredPropertyDefinition)); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/ValidationUtilsTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/ValidationUtilsTest.java index 17eae455aa4c4..f89d599ccc12a 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/ValidationUtilsTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/ValidationUtilsTest.java @@ -12,8 +12,8 @@ import com.linkedin.data.DataMap; import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; import com.linkedin.data.template.RecordTemplate; +import com.linkedin.metadata.entity.validation.ValidationApiUtils; import com.linkedin.metadata.entity.validation.ValidationException; -import com.linkedin.metadata.entity.validation.ValidationUtils; import com.linkedin.metadata.key.DatasetKey; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; @@ -43,14 +43,14 @@ public void testValidateOrThrowThrowsOnMissingUnrecognizedField() { rawMap.put("removed", true); rawMap.put("extraField", 1); Status status = new Status(rawMap); - assertThrows(ValidationException.class, () -> ValidationUtils.validateOrThrow(status)); + assertThrows(ValidationException.class, () -> ValidationApiUtils.validateOrThrow(status)); } @Test public void testValidateOrThrowThrowsOnMissingRequiredField() { DataMap rawMap = new DataMap(); BrowsePath status = new BrowsePath(rawMap); - assertThrows(ValidationException.class, () -> ValidationUtils.validateOrThrow(status)); + assertThrows(ValidationException.class, () -> ValidationApiUtils.validateOrThrow(status)); } @Test @@ -59,14 +59,14 @@ public void testValidateOrThrowDoesNotThrowOnMissingOptionalField() throws Excep Owner owner = new Owner(rawMap); owner.setOwner(Urn.createFromString("urn:li:corpuser:test")); owner.setType(OwnershipType.DATAOWNER); - ValidationUtils.validateOrThrow(owner); + ValidationApiUtils.validateOrThrow(owner); } @Test public void testValidateOrThrowDoesNotThrowOnMissingDefaultField() { DataMap rawMap = new DataMap(); Status status = new Status(rawMap); - ValidationUtils.validateOrThrow(status); + ValidationApiUtils.validateOrThrow(status); } @Test @@ -75,7 +75,7 @@ public void testConvertEntityUrnToKeyUrlEncoded() throws URISyntaxException { Urn.createFromString( "urn:li:dataset:(urn:li:dataPlatform:s3,urn:li:dataset:%28urn:li:dataPlatform:s3%2Ctest-datalake-concepts/prog_maintenance%2CPROD%29,PROD)"); - ValidationUtils.validateUrn(entityRegistry, urn); + ValidationApiUtils.validateUrn(entityRegistry, urn); final AspectSpec keyAspectSpec = entityRegistry.getEntitySpec(urn.getEntityType()).getKeyAspectSpec(); @@ -94,6 +94,6 @@ public void testConvertEntityUrnToKeyUrlEncoded() throws URISyntaxException { "urn:li:dataset:%28urn:li:dataPlatform:s3%2Ctest-datalake-concepts/prog_maintenance%2CPROD%29"); assertThrows( IllegalArgumentException.class, - () -> ValidationUtils.validateUrn(entityRegistry, invalidUrn)); + () -> ValidationApiUtils.validateUrn(entityRegistry, invalidUrn)); } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java index 326f814c13dd2..2f68f17dae241 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/AutocompleteRequestHandlerTest.java @@ -5,6 +5,11 @@ import static org.testng.Assert.assertTrue; import com.linkedin.metadata.TestEntitySpecBuilder; +import com.linkedin.metadata.aspect.AspectRetriever; +import com.linkedin.metadata.config.search.custom.AutocompleteConfiguration; +import com.linkedin.metadata.config.search.custom.BoolQueryConfiguration; +import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; +import com.linkedin.metadata.config.search.custom.QueryConfiguration; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.search.elasticsearch.query.request.AutocompleteRequestHandler; import io.datahubproject.metadata.context.OperationContext; @@ -12,10 +17,16 @@ import java.util.List; import java.util.Map; import org.opensearch.action.search.SearchRequest; +import org.opensearch.common.lucene.search.function.FieldValueFactorFunction; import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.MatchAllQueryBuilder; import org.opensearch.index.query.MatchPhrasePrefixQueryBuilder; import org.opensearch.index.query.MatchQueryBuilder; import org.opensearch.index.query.MultiMatchQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.query.functionscore.FunctionScoreQueryBuilder; +import org.opensearch.index.query.functionscore.ScoreFunctionBuilders; import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.search.fetch.subphase.highlight.HighlightBuilder; import org.testng.annotations.Test; @@ -23,10 +34,50 @@ public class AutocompleteRequestHandlerTest { private AutocompleteRequestHandler handler = AutocompleteRequestHandler.getBuilder( - TestEntitySpecBuilder.getSpec(), TestOperationContexts.emptyAspectRetriever(null)); + TestEntitySpecBuilder.getSpec(), + CustomSearchConfiguration.builder().build(), + TestOperationContexts.emptyAspectRetriever(null)); private OperationContext mockOpContext = TestOperationContexts.systemContextNoSearchAuthorization(mock(EntityRegistry.class)); + private static final QueryConfiguration TEST_QUERY_CONFIG = + QueryConfiguration.builder() + .queryRegex(".*") + .simpleQuery(true) + .exactMatchQuery(true) + .prefixMatchQuery(true) + .boolQuery( + BoolQueryConfiguration.builder() + .must(List.of(Map.of("term", Map.of("name", "{{query_string}}")))) + .build()) + .functionScore( + Map.of( + "score_mode", + "avg", + "boost_mode", + "multiply", + "functions", + List.of( + Map.of( + "weight", + 1, + "filter", + Map.of("match_all", Map.of())), + Map.of( + "weight", + 0.5, + "filter", + Map.of( + "term", Map.of("materialized", Map.of("value", true)))), + Map.of( + "weight", + 1.5, + "filter", + Map.of( + "term", + Map.of("deprecated", Map.of("value", false))))))) + .build(); + @Test public void testDefaultAutocompleteRequest() { // When field is null @@ -34,7 +85,9 @@ public void testDefaultAutocompleteRequest() { handler.getSearchRequest(mockOpContext, "input", null, null, 10); SearchSourceBuilder sourceBuilder = autocompleteRequest.source(); assertEquals(sourceBuilder.size(), 10); - BoolQueryBuilder query = (BoolQueryBuilder) sourceBuilder.query(); + BoolQueryBuilder wrapper = + (BoolQueryBuilder) ((FunctionScoreQueryBuilder) sourceBuilder.query()).query(); + BoolQueryBuilder query = (BoolQueryBuilder) extractNestedQuery(wrapper); assertEquals(query.should().size(), 3); MultiMatchQueryBuilder autocompleteQuery = (MultiMatchQueryBuilder) query.should().get(2); @@ -49,8 +102,8 @@ public void testDefaultAutocompleteRequest() { (MatchPhrasePrefixQueryBuilder) query.should().get(0); assertEquals("keyPart1.delimited", prefixQuery.fieldName()); - assertEquals(query.mustNot().size(), 1); - MatchQueryBuilder removedFilter = (MatchQueryBuilder) query.mustNot().get(0); + assertEquals(wrapper.mustNot().size(), 1); + MatchQueryBuilder removedFilter = (MatchQueryBuilder) wrapper.mustNot().get(0); assertEquals(removedFilter.fieldName(), "removed"); assertEquals(removedFilter.value(), true); HighlightBuilder highlightBuilder = sourceBuilder.highlighter(); @@ -73,7 +126,10 @@ public void testAutocompleteRequestWithField() { handler.getSearchRequest(mockOpContext, "input", "field", null, 10); SearchSourceBuilder sourceBuilder = autocompleteRequest.source(); assertEquals(sourceBuilder.size(), 10); - BoolQueryBuilder query = (BoolQueryBuilder) sourceBuilder.query(); + BoolQueryBuilder wrapper = + (BoolQueryBuilder) ((FunctionScoreQueryBuilder) sourceBuilder.query()).query(); + assertEquals(wrapper.should().size(), 1); + BoolQueryBuilder query = (BoolQueryBuilder) extractNestedQuery(wrapper); assertEquals(query.should().size(), 2); MultiMatchQueryBuilder autocompleteQuery = (MultiMatchQueryBuilder) query.should().get(1); @@ -88,7 +144,7 @@ public void testAutocompleteRequestWithField() { (MatchPhrasePrefixQueryBuilder) query.should().get(0); assertEquals("field.delimited", prefixQuery.fieldName()); - MatchQueryBuilder removedFilter = (MatchQueryBuilder) query.mustNot().get(0); + MatchQueryBuilder removedFilter = (MatchQueryBuilder) wrapper.mustNot().get(0); assertEquals(removedFilter.fieldName(), "removed"); assertEquals(removedFilter.value(), true); HighlightBuilder highlightBuilder = sourceBuilder.highlighter(); @@ -99,4 +155,272 @@ public void testAutocompleteRequestWithField() { assertEquals(highlightedFields.get(2).name(), "field.ngram"); assertEquals(highlightedFields.get(3).name(), "field.delimited"); } + + @Test + public void testCustomConfigWithDefault() { + // Exclude Default query + AutocompleteRequestHandler withoutDefaultQuery = + AutocompleteRequestHandler.getBuilder( + TestEntitySpecBuilder.getSpec(), + CustomSearchConfiguration.builder() + .autocompleteConfigurations( + List.of( + AutocompleteConfiguration.builder() + .queryRegex(".*") + .defaultQuery(false) + .boolQuery( + BoolQueryConfiguration.builder() + .should(List.of(Map.of("match_all", Map.of()))) + .build()) + .build())) + .build(), + mock(AspectRetriever.class)); + + SearchRequest autocompleteRequest = + withoutDefaultQuery.getSearchRequest(mockOpContext, "input", null, null, 10); + SearchSourceBuilder sourceBuilder = autocompleteRequest.source(); + FunctionScoreQueryBuilder wrapper = (FunctionScoreQueryBuilder) sourceBuilder.query(); + assertEquals(((BoolQueryBuilder) wrapper.query()).should().size(), 1); + QueryBuilder customQuery = extractNestedQuery((BoolQueryBuilder) wrapper.query()); + assertEquals(customQuery, QueryBuilders.matchAllQuery()); + + // Include Default query + AutocompleteRequestHandler withDefaultQuery = + AutocompleteRequestHandler.getBuilder( + TestEntitySpecBuilder.getSpec(), + CustomSearchConfiguration.builder() + .autocompleteConfigurations( + List.of( + AutocompleteConfiguration.builder() + .queryRegex(".*") + .defaultQuery(true) + .boolQuery( + BoolQueryConfiguration.builder() + .should(List.of(Map.of("match_all", Map.of()))) + .build()) + .build())) + .build(), + mock(AspectRetriever.class)); + + autocompleteRequest = withDefaultQuery.getSearchRequest(mockOpContext, "input", null, null, 10); + sourceBuilder = autocompleteRequest.source(); + wrapper = (FunctionScoreQueryBuilder) sourceBuilder.query(); + BoolQueryBuilder query = + ((BoolQueryBuilder) ((BoolQueryBuilder) wrapper.query()).should().get(0)); + assertEquals(query.should().size(), 2); + + List shouldQueries = query.should(); + + // Default + BoolQueryBuilder defaultQuery = + (BoolQueryBuilder) + shouldQueries.stream().filter(qb -> qb instanceof BoolQueryBuilder).findFirst().get(); + assertEquals(defaultQuery.should().size(), 3); + + // Custom + customQuery = + shouldQueries.stream().filter(qb -> qb instanceof MatchAllQueryBuilder).findFirst().get(); + assertEquals(customQuery, QueryBuilders.matchAllQuery()); + } + + @Test + public void testCustomConfigWithInheritedQueryFunctionScores() { + // Pickup scoring functions from non-autocomplete + AutocompleteRequestHandler withInherit = + AutocompleteRequestHandler.getBuilder( + TestEntitySpecBuilder.getSpec(), + CustomSearchConfiguration.builder() + .queryConfigurations(List.of(TEST_QUERY_CONFIG)) + .autocompleteConfigurations( + List.of( + AutocompleteConfiguration.builder() + .queryRegex(".*") + .defaultQuery(false) + .inheritFunctionScore(true) + .boolQuery( + BoolQueryConfiguration.builder() + .should(List.of(Map.of("match_all", Map.of()))) + .build()) + .build())) + .build(), + mock(AspectRetriever.class)); + + SearchRequest autocompleteRequest = + withInherit.getSearchRequest(mockOpContext, "input", null, null, 10); + SearchSourceBuilder sourceBuilder = autocompleteRequest.source(); + FunctionScoreQueryBuilder wrapper = (FunctionScoreQueryBuilder) sourceBuilder.query(); + assertEquals(((BoolQueryBuilder) wrapper.query()).should().size(), 1); + + QueryBuilder customQuery = extractNestedQuery(((BoolQueryBuilder) wrapper.query())); + assertEquals(customQuery, QueryBuilders.matchAllQuery()); + + FunctionScoreQueryBuilder.FilterFunctionBuilder[] expectedQueryConfigurationScoreFunctions = { + new FunctionScoreQueryBuilder.FilterFunctionBuilder( + ScoreFunctionBuilders.weightFactorFunction(1f)), + new FunctionScoreQueryBuilder.FilterFunctionBuilder( + QueryBuilders.termQuery("materialized", true), + ScoreFunctionBuilders.weightFactorFunction(0.5f)), + new FunctionScoreQueryBuilder.FilterFunctionBuilder( + QueryBuilders.termQuery("deprecated", false), + ScoreFunctionBuilders.weightFactorFunction(1.5f)) + }; + assertEquals(wrapper.filterFunctionBuilders(), expectedQueryConfigurationScoreFunctions); + + // no search query customization + AutocompleteRequestHandler noQueryCustomization = + AutocompleteRequestHandler.getBuilder( + TestEntitySpecBuilder.getSpec(), + CustomSearchConfiguration.builder() + .autocompleteConfigurations( + List.of( + AutocompleteConfiguration.builder() + .queryRegex(".*") + .defaultQuery(false) + .boolQuery( + BoolQueryConfiguration.builder() + .should(List.of(Map.of("match_all", Map.of()))) + .build()) + .build())) + .build(), + mock(AspectRetriever.class)); + + autocompleteRequest = + noQueryCustomization.getSearchRequest(mockOpContext, "input", null, null, 10); + sourceBuilder = autocompleteRequest.source(); + wrapper = (FunctionScoreQueryBuilder) sourceBuilder.query(); + assertEquals(((BoolQueryBuilder) wrapper.query()).should().size(), 1); + + customQuery = extractNestedQuery((BoolQueryBuilder) wrapper.query()); + assertEquals(customQuery, QueryBuilders.matchAllQuery()); + + // PDL annotation based on default behavior of query builder + FunctionScoreQueryBuilder.FilterFunctionBuilder[] expectedDefaultScoreFunctions = { + new FunctionScoreQueryBuilder.FilterFunctionBuilder( + ScoreFunctionBuilders.weightFactorFunction(1f)), + new FunctionScoreQueryBuilder.FilterFunctionBuilder( + ScoreFunctionBuilders.fieldValueFactorFunction("feature2") + .modifier(FieldValueFactorFunction.Modifier.NONE) + .missing(0.0)), + new FunctionScoreQueryBuilder.FilterFunctionBuilder( + ScoreFunctionBuilders.fieldValueFactorFunction("feature1") + .modifier(FieldValueFactorFunction.Modifier.LOG1P) + .missing(0.0)) + }; + assertEquals(wrapper.filterFunctionBuilders(), expectedDefaultScoreFunctions); + } + + @Test + public void testCustomConfigWithFunctionScores() { + // Scoring functions explicit autocomplete override + AutocompleteRequestHandler explicitNoInherit = + AutocompleteRequestHandler.getBuilder( + TestEntitySpecBuilder.getSpec(), + CustomSearchConfiguration.builder() + .queryConfigurations(List.of(TEST_QUERY_CONFIG)) // should be ignored + .autocompleteConfigurations( + List.of( + AutocompleteConfiguration.builder() + .queryRegex(".*") + .defaultQuery(false) + .inheritFunctionScore(false) + .boolQuery( + BoolQueryConfiguration.builder() + .should(List.of(Map.of("match_all", Map.of()))) + .build()) + .functionScore( + Map.of( + "score_mode", + "avg", + "boost_mode", + "multiply", + "functions", + List.of( + Map.of( + "weight", + 1.5, + "filter", + Map.of( + "term", + Map.of( + "deprecated", Map.of("value", false))))))) + .build())) + .build(), + mock(AspectRetriever.class)); + + SearchRequest autocompleteRequest = + explicitNoInherit.getSearchRequest(mockOpContext, "input", null, null, 10); + SearchSourceBuilder sourceBuilder = autocompleteRequest.source(); + FunctionScoreQueryBuilder wrapper = (FunctionScoreQueryBuilder) sourceBuilder.query(); + assertEquals(((BoolQueryBuilder) wrapper.query()).should().size(), 1); + + QueryBuilder customQuery = extractNestedQuery((BoolQueryBuilder) wrapper.query()); + assertEquals(customQuery, QueryBuilders.matchAllQuery()); + + FunctionScoreQueryBuilder.FilterFunctionBuilder[] expectedCustomScoreFunctions = { + new FunctionScoreQueryBuilder.FilterFunctionBuilder( + QueryBuilders.termQuery("deprecated", false), + ScoreFunctionBuilders.weightFactorFunction(1.5f)) + }; + assertEquals(wrapper.filterFunctionBuilders(), expectedCustomScoreFunctions); + + // Pickup scoring functions explicit autocomplete override (even though default query and + // inherit enabled) + AutocompleteRequestHandler explicit = + AutocompleteRequestHandler.getBuilder( + TestEntitySpecBuilder.getSpec(), + CustomSearchConfiguration.builder() + .queryConfigurations(List.of(TEST_QUERY_CONFIG)) // should be ignored + .autocompleteConfigurations( + List.of( + AutocompleteConfiguration.builder() + .queryRegex(".*") + .defaultQuery(true) + .inheritFunctionScore(true) + .boolQuery( + BoolQueryConfiguration.builder() + .should(List.of(Map.of("match_all", Map.of()))) + .build()) + .functionScore( + Map.of( + "score_mode", + "avg", + "boost_mode", + "multiply", + "functions", + List.of( + Map.of( + "weight", + 1.5, + "filter", + Map.of( + "term", + Map.of( + "deprecated", Map.of("value", false))))))) + .build())) + .build(), + mock(AspectRetriever.class)); + + autocompleteRequest = explicit.getSearchRequest(mockOpContext, "input", null, null, 10); + sourceBuilder = autocompleteRequest.source(); + wrapper = (FunctionScoreQueryBuilder) sourceBuilder.query(); + BoolQueryBuilder query = + ((BoolQueryBuilder) ((BoolQueryBuilder) wrapper.query()).should().get(0)); + assertEquals(query.should().size(), 2); + + customQuery = query.should().get(0); + assertEquals(customQuery, QueryBuilders.matchAllQuery()); + + // standard query still present + assertEquals(((BoolQueryBuilder) query.should().get(1)).should().size(), 3); + + // custom functions included + assertEquals(wrapper.filterFunctionBuilders(), expectedCustomScoreFunctions); + } + + private static QueryBuilder extractNestedQuery(BoolQueryBuilder nested) { + assertEquals(nested.should().size(), 1); + BoolQueryBuilder firstLevel = (BoolQueryBuilder) nested.should().get(0); + assertEquals(firstLevel.should().size(), 1); + return firstLevel.should().get(0); + } } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/CustomizedQueryHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/CustomizedQueryHandlerTest.java index 47d18fe0d299c..4e4c8acf300e4 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/CustomizedQueryHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/CustomizedQueryHandlerTest.java @@ -178,8 +178,8 @@ public void functionScoreQueryBuilderTest() { * Test select star */ FunctionScoreQueryBuilder selectStarTest = - SEARCH_QUERY_BUILDER.functionScoreQueryBuilder( - test.lookupQueryConfig("*").get(), inputQuery); + CustomizedQueryHandler.functionScoreQueryBuilder( + new ObjectMapper(), test.lookupQueryConfig("*").get(), inputQuery); FunctionScoreQueryBuilder.FilterFunctionBuilder[] expectedSelectStarScoreFunctions = { new FunctionScoreQueryBuilder.FilterFunctionBuilder( @@ -202,8 +202,8 @@ public void functionScoreQueryBuilderTest() { * Test default (non-select start) */ FunctionScoreQueryBuilder defaultTest = - SEARCH_QUERY_BUILDER.functionScoreQueryBuilder( - test.lookupQueryConfig("foobar").get(), inputQuery); + CustomizedQueryHandler.functionScoreQueryBuilder( + new ObjectMapper(), test.lookupQueryConfig("foobar").get(), inputQuery); FunctionScoreQueryBuilder.FilterFunctionBuilder[] expectedDefaultScoreFunctions = { new FunctionScoreQueryBuilder.FilterFunctionBuilder( diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchQueryBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchQueryBuilderTest.java index 39dcd120f2842..dbb5bdb0b7d01 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchQueryBuilderTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchQueryBuilderTest.java @@ -29,6 +29,7 @@ import com.linkedin.metadata.search.elasticsearch.query.request.SearchQueryBuilder; import com.linkedin.util.Pair; import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.test.metadata.context.TestOperationContexts; import io.datahubproject.test.search.config.SearchCommonTestConfiguration; import java.io.IOException; import java.util.List; @@ -91,15 +92,14 @@ public class SearchQueryBuilderTest extends AbstractTestNGSpringContextTests { public static final SearchQueryBuilder TEST_BUILDER = new SearchQueryBuilder(testQueryConfig, null); + public OperationContext opContext = TestOperationContexts.systemContextNoSearchAuthorization(); + @Test public void testQueryBuilderFulltext() { FunctionScoreQueryBuilder result = (FunctionScoreQueryBuilder) TEST_BUILDER.buildQuery( - mock(OperationContext.class), - ImmutableList.of(TestEntitySpecBuilder.getSpec()), - "testQuery", - true); + opContext, ImmutableList.of(TestEntitySpecBuilder.getSpec()), "testQuery", true); BoolQueryBuilder mainQuery = (BoolQueryBuilder) result.query(); List shouldQueries = mainQuery.should(); assertEquals(shouldQueries.size(), 2); @@ -200,10 +200,7 @@ public void testQueryBuilderStructured() { FunctionScoreQueryBuilder result = (FunctionScoreQueryBuilder) TEST_BUILDER.buildQuery( - mock(OperationContext.class), - ImmutableList.of(TestEntitySpecBuilder.getSpec()), - "testQuery", - false); + opContext, ImmutableList.of(TestEntitySpecBuilder.getSpec()), "testQuery", false); BoolQueryBuilder mainQuery = (BoolQueryBuilder) result.query(); List shouldQueries = mainQuery.should(); assertEquals(shouldQueries.size(), 2); @@ -246,10 +243,7 @@ public void testCustomSelectAll() { FunctionScoreQueryBuilder result = (FunctionScoreQueryBuilder) TEST_CUSTOM_BUILDER.buildQuery( - mock(OperationContext.class), - ImmutableList.of(TestEntitySpecBuilder.getSpec()), - triggerQuery, - true); + opContext, ImmutableList.of(TestEntitySpecBuilder.getSpec()), triggerQuery, true); BoolQueryBuilder mainQuery = (BoolQueryBuilder) result.query(); List shouldQueries = mainQuery.should(); @@ -263,10 +257,7 @@ public void testCustomExactMatch() { FunctionScoreQueryBuilder result = (FunctionScoreQueryBuilder) TEST_CUSTOM_BUILDER.buildQuery( - mock(OperationContext.class), - ImmutableList.of(TestEntitySpecBuilder.getSpec()), - triggerQuery, - true); + opContext, ImmutableList.of(TestEntitySpecBuilder.getSpec()), triggerQuery, true); BoolQueryBuilder mainQuery = (BoolQueryBuilder) result.query(); List shouldQueries = mainQuery.should(); @@ -302,10 +293,7 @@ public void testCustomDefault() { FunctionScoreQueryBuilder result = (FunctionScoreQueryBuilder) TEST_CUSTOM_BUILDER.buildQuery( - mock(OperationContext.class), - ImmutableList.of(TestEntitySpecBuilder.getSpec()), - triggerQuery, - true); + opContext, ImmutableList.of(TestEntitySpecBuilder.getSpec()), triggerQuery, true); BoolQueryBuilder mainQuery = (BoolQueryBuilder) result.query(); List shouldQueries = mainQuery.should(); diff --git a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java index 60d1333be272d..5da970b46afc7 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java +++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SampleDataFixtureConfiguration.java @@ -315,6 +315,7 @@ private EntityClient entityClientHelper( null, null, null, - null); + null, + 1); } } diff --git a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java index d2bc670ac64a0..34598821f43fd 100644 --- a/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java +++ b/metadata-io/src/test/java/io/datahubproject/test/fixtures/search/SearchLineageFixtureConfiguration.java @@ -250,6 +250,7 @@ protected EntityClient entityClient( null, null, null, - null); + null, + 1); } } diff --git a/metadata-io/src/test/resources/search_config_test.yml b/metadata-io/src/test/resources/search_config_test.yml index 787d7f22de431..2ec81eddcab0e 100644 --- a/metadata-io/src/test/resources/search_config_test.yml +++ b/metadata-io/src/test/resources/search_config_test.yml @@ -53,3 +53,28 @@ queryConfigurations: weight: 1.5 score_mode: avg boost_mode: multiply + +autocompleteConfigurations: + - queryRegex: .* + defaultQuery: true + boolQuery: + must: + - term: + removed: 'false' + functionScore: + functions: + - filter: + match_all: {} + weight: 1 + - filter: + term: + materialized: + value: true + weight: 0.5 + - filter: + term: + deprecated: + value: false + weight: 1.5 + score_mode: avg + boost_mode: multiply \ No newline at end of file diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHookTest.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHookTest.java index bb2a96693d214..b06b7df1846bd 100644 --- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHookTest.java +++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/event/EntityChangeEventGeneratorHookTest.java @@ -1,6 +1,7 @@ package com.linkedin.metadata.kafka.hook.event; import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.timeline.eventgenerator.ChangeEventGeneratorUtils.getSchemaFieldUrn; import static org.mockito.ArgumentMatchers.*; import static org.mockito.Mockito.*; @@ -36,6 +37,7 @@ import com.linkedin.dataprocess.DataProcessRunStatus; import com.linkedin.dataprocess.RunResultType; import com.linkedin.dataset.DatasetProperties; +import com.linkedin.dataset.EditableDatasetProperties; import com.linkedin.domain.Domains; import com.linkedin.entity.Aspect; import com.linkedin.entity.EntityResponse; @@ -43,29 +45,20 @@ import com.linkedin.entity.EnvelopedAspectMap; import com.linkedin.entity.client.SystemEntityClient; import com.linkedin.events.metadata.ChangeType; -import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.key.DatasetKey; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.timeline.data.ChangeCategory; import com.linkedin.metadata.timeline.data.ChangeOperation; -import com.linkedin.metadata.timeline.eventgenerator.AssertionRunEventChangeEventGenerator; -import com.linkedin.metadata.timeline.eventgenerator.DataProcessInstanceRunEventChangeEventGenerator; -import com.linkedin.metadata.timeline.eventgenerator.DeprecationChangeEventGenerator; -import com.linkedin.metadata.timeline.eventgenerator.EntityChangeEventGeneratorRegistry; -import com.linkedin.metadata.timeline.eventgenerator.EntityKeyChangeEventGenerator; -import com.linkedin.metadata.timeline.eventgenerator.GlobalTagsChangeEventGenerator; -import com.linkedin.metadata.timeline.eventgenerator.GlossaryTermsChangeEventGenerator; -import com.linkedin.metadata.timeline.eventgenerator.OwnershipChangeEventGenerator; -import com.linkedin.metadata.timeline.eventgenerator.SingleDomainChangeEventGenerator; -import com.linkedin.metadata.timeline.eventgenerator.StatusChangeEventGenerator; +import com.linkedin.metadata.timeline.eventgenerator.*; import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.mxe.MetadataChangeLog; import com.linkedin.mxe.PlatformEvent; import com.linkedin.mxe.PlatformEventHeader; import com.linkedin.platform.event.v1.EntityChangeEvent; import com.linkedin.platform.event.v1.Parameters; +import com.linkedin.schema.*; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.test.metadata.context.TestOperationContexts; import java.net.URISyntaxException; @@ -77,7 +70,7 @@ /** * Tests the {@link EntityChangeEventGeneratorHook}. * - *

TODO: Include Schema Field Tests, description update tests. + *

TODO: Include more Schema Field Tests for tags, terms and schema-changes. */ public class EntityChangeEventGeneratorHookTest { private static final long EVENT_TIME = 123L; @@ -96,14 +89,12 @@ public class EntityChangeEventGeneratorHookTest { private Urn actorUrn; private SystemEntityClient _mockClient; - private EntityService _mockEntityService; private EntityChangeEventGeneratorHook _entityChangeEventHook; @BeforeMethod public void setupTest() throws URISyntaxException { actorUrn = Urn.createFromString(TEST_ACTOR_URN); _mockClient = Mockito.mock(SystemEntityClient.class); - _mockEntityService = Mockito.mock(EntityService.class); EntityChangeEventGeneratorRegistry entityChangeEventGeneratorRegistry = createEntityChangeEventGeneratorRegistry(); _entityChangeEventHook = @@ -675,6 +666,381 @@ public void testInvokeIneligibleAspect() throws Exception { Mockito.verifyNoMoreInteractions(_mockClient); } + @Test + public void testDatasetPropertiesAdd() throws Exception { + final String newDescription = "New desc"; + MetadataChangeLog event = new MetadataChangeLog(); + event.setEntityType(DATASET_ENTITY_NAME); + event.setAspectName(DATASET_PROPERTIES_ASPECT_NAME); + event.setChangeType(ChangeType.UPSERT); + event.setEntityUrn(Urn.createFromString(TEST_DATASET_URN)); + event.setCreated(new AuditStamp().setActor(actorUrn).setTime(EVENT_TIME)); + + event.setAspect( + GenericRecordUtils.serializeAspect(new DatasetProperties().setDescription(newDescription))); + _entityChangeEventHook.invoke(event); + + PlatformEvent platformEvent = + createChangeEvent( + DATASET_ENTITY_NAME, + Urn.createFromString(TEST_DATASET_URN), + ChangeCategory.DOCUMENTATION, + ChangeOperation.ADD, + null, + ImmutableMap.of("description", newDescription), + actorUrn); + verifyProducePlatformEvent(_mockClient, platformEvent); + } + + @Test + public void testDatasetDescriptionAdd() throws Exception { + final String newDescription = "New desc"; + MetadataChangeLog event = new MetadataChangeLog(); + event.setEntityType(DATASET_ENTITY_NAME); + event.setAspectName(DATASET_PROPERTIES_ASPECT_NAME); + event.setChangeType(ChangeType.UPSERT); + event.setEntityUrn(Urn.createFromString(TEST_DATASET_URN)); + event.setCreated(new AuditStamp().setActor(actorUrn).setTime(EVENT_TIME)); + + event.setAspect( + GenericRecordUtils.serializeAspect(new DatasetProperties().setDescription(newDescription))); + event.setPreviousAspectValue(GenericRecordUtils.serializeAspect(new DatasetProperties())); + + _entityChangeEventHook.invoke(event); + + PlatformEvent platformEvent = + createChangeEvent( + DATASET_ENTITY_NAME, + Urn.createFromString(TEST_DATASET_URN), + ChangeCategory.DOCUMENTATION, + ChangeOperation.ADD, + null, + ImmutableMap.of("description", newDescription), + actorUrn); + verifyProducePlatformEvent(_mockClient, platformEvent); + } + + @Test + public void testDatasetDescriptionModify() throws Exception { + final String newDescription = "New desc"; + MetadataChangeLog event = new MetadataChangeLog(); + event.setEntityType(DATASET_ENTITY_NAME); + event.setAspectName(DATASET_PROPERTIES_ASPECT_NAME); + event.setChangeType(ChangeType.UPSERT); + event.setEntityUrn(Urn.createFromString(TEST_DATASET_URN)); + event.setCreated(new AuditStamp().setActor(actorUrn).setTime(EVENT_TIME)); + + event.setAspect( + GenericRecordUtils.serializeAspect(new DatasetProperties().setDescription(newDescription))); + event.setPreviousAspectValue( + GenericRecordUtils.serializeAspect(new DatasetProperties().setDescription("Old desc"))); + + _entityChangeEventHook.invoke(event); + + PlatformEvent platformEvent = + createChangeEvent( + DATASET_ENTITY_NAME, + Urn.createFromString(TEST_DATASET_URN), + ChangeCategory.DOCUMENTATION, + ChangeOperation.MODIFY, + null, + ImmutableMap.of("description", newDescription), + actorUrn); + verifyProducePlatformEvent(_mockClient, platformEvent); + } + + @Test + public void testDatasetDescriptionRemove() throws Exception { + final String oldDescription = "Old desc"; + MetadataChangeLog event = new MetadataChangeLog(); + event.setEntityType(DATASET_ENTITY_NAME); + event.setAspectName(DATASET_PROPERTIES_ASPECT_NAME); + event.setChangeType(ChangeType.UPSERT); + event.setEntityUrn(Urn.createFromString(TEST_DATASET_URN)); + event.setCreated(new AuditStamp().setActor(actorUrn).setTime(EVENT_TIME)); + + event.setAspect(GenericRecordUtils.serializeAspect(new DatasetProperties())); + event.setPreviousAspectValue( + GenericRecordUtils.serializeAspect(new DatasetProperties().setDescription(oldDescription))); + + _entityChangeEventHook.invoke(event); + + PlatformEvent platformEvent = + createChangeEvent( + DATASET_ENTITY_NAME, + Urn.createFromString(TEST_DATASET_URN), + ChangeCategory.DOCUMENTATION, + ChangeOperation.REMOVE, + null, + ImmutableMap.of("description", oldDescription), + actorUrn); + verifyProducePlatformEvent(_mockClient, platformEvent); + } + + @Test + public void testEditableDatasetPropertiesAdd() throws Exception { + final String newDescription = "New desc"; + MetadataChangeLog event = new MetadataChangeLog(); + event.setEntityType(DATASET_ENTITY_NAME); + event.setAspectName(EDITABLE_DATASET_PROPERTIES_ASPECT_NAME); + event.setChangeType(ChangeType.UPSERT); + event.setEntityUrn(Urn.createFromString(TEST_DATASET_URN)); + event.setCreated(new AuditStamp().setActor(actorUrn).setTime(EVENT_TIME)); + + event.setAspect( + GenericRecordUtils.serializeAspect( + new EditableDatasetProperties().setDescription(newDescription))); + _entityChangeEventHook.invoke(event); + + PlatformEvent platformEvent = + createChangeEvent( + DATASET_ENTITY_NAME, + Urn.createFromString(TEST_DATASET_URN), + ChangeCategory.DOCUMENTATION, + ChangeOperation.ADD, + null, + ImmutableMap.of("description", newDescription), + actorUrn); + verifyProducePlatformEvent(_mockClient, platformEvent); + } + + @Test + public void testEditableDatasetDescriptionAdd() throws Exception { + final String newDescription = "New desc"; + MetadataChangeLog event = new MetadataChangeLog(); + event.setEntityType(DATASET_ENTITY_NAME); + event.setAspectName(EDITABLE_DATASET_PROPERTIES_ASPECT_NAME); + event.setChangeType(ChangeType.UPSERT); + event.setEntityUrn(Urn.createFromString(TEST_DATASET_URN)); + event.setCreated(new AuditStamp().setActor(actorUrn).setTime(EVENT_TIME)); + + event.setAspect( + GenericRecordUtils.serializeAspect( + new EditableDatasetProperties().setDescription(newDescription))); + event.setPreviousAspectValue( + GenericRecordUtils.serializeAspect(new EditableDatasetProperties())); + + _entityChangeEventHook.invoke(event); + + PlatformEvent platformEvent = + createChangeEvent( + DATASET_ENTITY_NAME, + Urn.createFromString(TEST_DATASET_URN), + ChangeCategory.DOCUMENTATION, + ChangeOperation.ADD, + null, + ImmutableMap.of("description", newDescription), + actorUrn); + verifyProducePlatformEvent(_mockClient, platformEvent); + } + + @Test + public void testEditableDatasetDescriptionModify() throws Exception { + final String newDescription = "New desc"; + MetadataChangeLog event = new MetadataChangeLog(); + event.setEntityType(DATASET_ENTITY_NAME); + event.setAspectName(EDITABLE_DATASET_PROPERTIES_ASPECT_NAME); + event.setChangeType(ChangeType.UPSERT); + event.setEntityUrn(Urn.createFromString(TEST_DATASET_URN)); + event.setCreated(new AuditStamp().setActor(actorUrn).setTime(EVENT_TIME)); + + event.setAspect( + GenericRecordUtils.serializeAspect( + new EditableDatasetProperties().setDescription(newDescription))); + event.setPreviousAspectValue( + GenericRecordUtils.serializeAspect( + new EditableDatasetProperties().setDescription("Old desc"))); + + _entityChangeEventHook.invoke(event); + + PlatformEvent platformEvent = + createChangeEvent( + DATASET_ENTITY_NAME, + Urn.createFromString(TEST_DATASET_URN), + ChangeCategory.DOCUMENTATION, + ChangeOperation.MODIFY, + null, + ImmutableMap.of("description", newDescription), + actorUrn); + verifyProducePlatformEvent(_mockClient, platformEvent); + } + + @Test + public void testEditableDatasetDescriptionRemove() throws Exception { + final String oldDescription = "Old desc"; + MetadataChangeLog event = new MetadataChangeLog(); + event.setEntityType(DATASET_ENTITY_NAME); + event.setAspectName(EDITABLE_DATASET_PROPERTIES_ASPECT_NAME); + event.setChangeType(ChangeType.UPSERT); + event.setEntityUrn(Urn.createFromString(TEST_DATASET_URN)); + event.setCreated(new AuditStamp().setActor(actorUrn).setTime(EVENT_TIME)); + + event.setAspect(GenericRecordUtils.serializeAspect(new EditableDatasetProperties())); + event.setPreviousAspectValue( + GenericRecordUtils.serializeAspect( + new EditableDatasetProperties().setDescription(oldDescription))); + + _entityChangeEventHook.invoke(event); + + PlatformEvent platformEvent = + createChangeEvent( + DATASET_ENTITY_NAME, + Urn.createFromString(TEST_DATASET_URN), + ChangeCategory.DOCUMENTATION, + ChangeOperation.REMOVE, + null, + ImmutableMap.of("description", oldDescription), + actorUrn); + verifyProducePlatformEvent(_mockClient, platformEvent); + } + + @Test + public void testSchemaFieldDescriptionChanges() throws Exception { + MetadataChangeLog event = new MetadataChangeLog(); + event.setEntityType(DATASET_ENTITY_NAME); + event.setAspectName(SCHEMA_METADATA_ASPECT_NAME); + event.setChangeType(ChangeType.UPSERT); + event.setEntityUrn(Urn.createFromString(TEST_DATASET_URN)); + event.setCreated(new AuditStamp().setActor(actorUrn).setTime(EVENT_TIME)); + + SchemaField field1 = new SchemaField().setNativeDataType("string").setFieldPath("c1"); + SchemaField field2 = new SchemaField().setNativeDataType("string").setFieldPath("c2"); + SchemaField field3 = new SchemaField().setNativeDataType("string").setFieldPath("c3"); + SchemaField field4 = new SchemaField().setNativeDataType("string").setFieldPath("c4"); + + SchemaFieldArray oldFields = new SchemaFieldArray(); + SchemaFieldArray newFields = new SchemaFieldArray(); + + oldFields.add(field1.clone()); + newFields.add(field1.clone().setDescription("c1Desc")); + + oldFields.add(field2.clone().setDescription("oldC2Desc")); + newFields.add(field2.clone().setDescription("newC2Desc")); + + oldFields.add(field3.clone().setDescription("c3Desc")); + newFields.add(field3.clone()); + + oldFields.add(field4.clone().setDescription("c4Desc")); + newFields.add(field4.clone().setDescription("c4Desc")); + + event.setPreviousAspectValue( + GenericRecordUtils.serializeAspect(new SchemaMetadata().setFields(oldFields))); + event.setAspect(GenericRecordUtils.serializeAspect(new SchemaMetadata().setFields(newFields))); + + _entityChangeEventHook.invoke(event); + + verifyProducePlatformEvent( + _mockClient, + createChangeEvent( + SCHEMA_FIELD_ENTITY_NAME, + getSchemaFieldUrn(Urn.createFromString(TEST_DATASET_URN), "c1"), + ChangeCategory.DOCUMENTATION, + ChangeOperation.ADD, + null, + ImmutableMap.of("description", "c1Desc"), + actorUrn), + false); + + verifyProducePlatformEvent( + _mockClient, + createChangeEvent( + SCHEMA_FIELD_ENTITY_NAME, + getSchemaFieldUrn(Urn.createFromString(TEST_DATASET_URN), "c2"), + ChangeCategory.DOCUMENTATION, + ChangeOperation.MODIFY, + null, + ImmutableMap.of("description", "newC2Desc"), + actorUrn), + false); + + verifyProducePlatformEvent( + _mockClient, + createChangeEvent( + SCHEMA_FIELD_ENTITY_NAME, + getSchemaFieldUrn(Urn.createFromString(TEST_DATASET_URN), "c3"), + ChangeCategory.DOCUMENTATION, + ChangeOperation.REMOVE, + null, + ImmutableMap.of("description", "c3Desc"), + actorUrn), + true); + } + + @Test + public void testEditableSchemaFieldDescriptionChanges() throws Exception { + MetadataChangeLog event = new MetadataChangeLog(); + event.setEntityType(DATASET_ENTITY_NAME); + event.setAspectName(EDITABLE_SCHEMA_METADATA_ASPECT_NAME); + event.setChangeType(ChangeType.UPSERT); + event.setEntityUrn(Urn.createFromString(TEST_DATASET_URN)); + event.setCreated(new AuditStamp().setActor(actorUrn).setTime(EVENT_TIME)); + + EditableSchemaFieldInfo field1 = new EditableSchemaFieldInfo().setFieldPath("c1"); + EditableSchemaFieldInfo field2 = new EditableSchemaFieldInfo().setFieldPath("c2"); + EditableSchemaFieldInfo field3 = new EditableSchemaFieldInfo().setFieldPath("c3"); + EditableSchemaFieldInfo field4 = new EditableSchemaFieldInfo().setFieldPath("c4"); + + EditableSchemaFieldInfoArray oldFields = new EditableSchemaFieldInfoArray(); + EditableSchemaFieldInfoArray newFields = new EditableSchemaFieldInfoArray(); + + oldFields.add(field1.clone()); + newFields.add(field1.clone().setDescription("c1Desc")); + + oldFields.add(field2.clone().setDescription("oldC2Desc")); + newFields.add(field2.clone().setDescription("newC2Desc")); + + oldFields.add(field3.clone().setDescription("c3Desc")); + newFields.add(field3.clone()); + + oldFields.add(field4.clone().setDescription("c4Desc")); + newFields.add(field4.clone().setDescription("c4Desc")); + + event.setPreviousAspectValue( + GenericRecordUtils.serializeAspect( + new EditableSchemaMetadata().setEditableSchemaFieldInfo(oldFields))); + event.setAspect( + GenericRecordUtils.serializeAspect( + new EditableSchemaMetadata().setEditableSchemaFieldInfo(newFields))); + + _entityChangeEventHook.invoke(event); + + verifyProducePlatformEvent( + _mockClient, + createChangeEvent( + SCHEMA_FIELD_ENTITY_NAME, + getSchemaFieldUrn(Urn.createFromString(TEST_DATASET_URN), "c1"), + ChangeCategory.DOCUMENTATION, + ChangeOperation.ADD, + null, + ImmutableMap.of("description", "c1Desc"), + actorUrn), + false); + + verifyProducePlatformEvent( + _mockClient, + createChangeEvent( + SCHEMA_FIELD_ENTITY_NAME, + getSchemaFieldUrn(Urn.createFromString(TEST_DATASET_URN), "c2"), + ChangeCategory.DOCUMENTATION, + ChangeOperation.MODIFY, + null, + ImmutableMap.of("description", "newC2Desc"), + actorUrn), + false); + + verifyProducePlatformEvent( + _mockClient, + createChangeEvent( + SCHEMA_FIELD_ENTITY_NAME, + getSchemaFieldUrn(Urn.createFromString(TEST_DATASET_URN), "c3"), + ChangeCategory.DOCUMENTATION, + ChangeOperation.REMOVE, + null, + ImmutableMap.of("description", "c3Desc"), + actorUrn), + true); + } + private PlatformEvent createChangeEvent( String entityType, Urn entityUrn, @@ -714,8 +1080,13 @@ private EntityChangeEventGeneratorRegistry createEntityChangeEventGeneratorRegis registry.register(OWNERSHIP_ASPECT_NAME, new OwnershipChangeEventGenerator()); registry.register(STATUS_ASPECT_NAME, new StatusChangeEventGenerator()); registry.register(DEPRECATION_ASPECT_NAME, new DeprecationChangeEventGenerator()); - - // TODO Add Dataset Schema Field related change generators. + registry.register(DATASET_PROPERTIES_ASPECT_NAME, new DatasetPropertiesChangeEventGenerator()); + registry.register( + EDITABLE_DATASET_PROPERTIES_ASPECT_NAME, + new EditableDatasetPropertiesChangeEventGenerator()); + registry.register(SCHEMA_METADATA_ASPECT_NAME, new SchemaMetadataChangeEventGenerator()); + registry.register( + EDITABLE_SCHEMA_METADATA_ASPECT_NAME, new EditableSchemaMetadataChangeEventGenerator()); // Entity Lifecycle change event generators registry.register(DATASET_KEY_ASPECT_NAME, new EntityKeyChangeEventGenerator<>()); @@ -756,6 +1127,23 @@ private OperationContext createMockOperationContext() { AspectSpec mockDatasetKey = createMockAspectSpec(DatasetKey.class); Mockito.when(datasetSpec.getAspectSpec(eq(DATASET_KEY_ASPECT_NAME))).thenReturn(mockDatasetKey); + AspectSpec mockDatasetProperties = createMockAspectSpec(DatasetProperties.class); + Mockito.when(datasetSpec.getAspectSpec(eq(DATASET_PROPERTIES_ASPECT_NAME))) + .thenReturn(mockDatasetProperties); + + AspectSpec mockEditableDatasetProperties = + createMockAspectSpec(EditableDatasetProperties.class); + Mockito.when(datasetSpec.getAspectSpec(eq(EDITABLE_DATASET_PROPERTIES_ASPECT_NAME))) + .thenReturn(mockEditableDatasetProperties); + + AspectSpec mockSchemaMetadata = createMockAspectSpec(SchemaMetadata.class); + Mockito.when(datasetSpec.getAspectSpec(eq(SCHEMA_METADATA_ASPECT_NAME))) + .thenReturn(mockSchemaMetadata); + + AspectSpec mockEditableSchemaMetadata = createMockAspectSpec(EditableSchemaMetadata.class); + Mockito.when(datasetSpec.getAspectSpec(eq(EDITABLE_SCHEMA_METADATA_ASPECT_NAME))) + .thenReturn(mockEditableSchemaMetadata); + Mockito.when(registry.getEntitySpec(eq(DATASET_ENTITY_NAME))).thenReturn(datasetSpec); // Build Assertion Entity Spec diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTest.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLGMSSpringTest.java similarity index 91% rename from metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTest.java rename to metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLGMSSpringTest.java index 0b0f7c7bce031..c2a8de161eafe 100644 --- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTest.java +++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLGMSSpringTest.java @@ -17,7 +17,11 @@ import org.testng.annotations.Test; @SpringBootTest( - classes = {MCLSpringTestConfiguration.class, ConfigurationProvider.class}, + classes = { + MCLSpringCommonTestConfiguration.class, + MCLSpringGMSTestConfiguration.class, + ConfigurationProvider.class + }, properties = { "ingestionScheduler.enabled=false", "configEntityRegistry.path=../../metadata-jobs/mae-consumer/src/test/resources/test-entity-registry.yml", @@ -27,7 +31,7 @@ locations = "classpath:/application.yaml", properties = {"MCL_CONSUMER_ENABLED=true"}) @EnableAutoConfiguration(exclude = {CassandraAutoConfiguration.class}) -public class MCLSpringTest extends AbstractTestNGSpringContextTests { +public class MCLGMSSpringTest extends AbstractTestNGSpringContextTests { @Test public void testHooks() { diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLMAESpringTest.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLMAESpringTest.java new file mode 100644 index 0000000000000..23de7707cc571 --- /dev/null +++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLMAESpringTest.java @@ -0,0 +1,56 @@ +package com.linkedin.metadata.kafka.hook.spring; + +import static org.testng.AssertJUnit.assertEquals; +import static org.testng.AssertJUnit.assertTrue; + +import com.linkedin.gms.factory.config.ConfigurationProvider; +import com.linkedin.metadata.kafka.MetadataChangeLogProcessor; +import com.linkedin.metadata.kafka.hook.UpdateIndicesHook; +import com.linkedin.metadata.kafka.hook.event.EntityChangeEventGeneratorHook; +import com.linkedin.metadata.kafka.hook.incident.IncidentsSummaryHook; +import com.linkedin.metadata.kafka.hook.ingestion.IngestionSchedulerHook; +import com.linkedin.metadata.kafka.hook.siblings.SiblingAssociationHook; +import org.springframework.boot.autoconfigure.EnableAutoConfiguration; +import org.springframework.boot.autoconfigure.cassandra.CassandraAutoConfiguration; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.TestPropertySource; +import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; +import org.testng.annotations.Test; + +@SpringBootTest( + classes = {MCLSpringCommonTestConfiguration.class, ConfigurationProvider.class}, + properties = { + "entityClient.impl=restli", + "ingestionScheduler.enabled=false", + "configEntityRegistry.path=../../metadata-jobs/mae-consumer/src/test/resources/test-entity-registry.yml", + "kafka.schemaRegistry.type=INTERNAL" + }) +@TestPropertySource( + locations = "classpath:/application.yaml", + properties = {"MCL_CONSUMER_ENABLED=true"}) +@EnableAutoConfiguration(exclude = {CassandraAutoConfiguration.class}) +public class MCLMAESpringTest extends AbstractTestNGSpringContextTests { + + @Test + public void testHooks() { + MetadataChangeLogProcessor metadataChangeLogProcessor = + applicationContext.getBean(MetadataChangeLogProcessor.class); + assertTrue( + metadataChangeLogProcessor.getHooks().stream() + .noneMatch(hook -> hook instanceof IngestionSchedulerHook)); + assertTrue( + metadataChangeLogProcessor.getHooks().stream() + .anyMatch(hook -> hook instanceof UpdateIndicesHook)); + assertTrue( + metadataChangeLogProcessor.getHooks().stream() + .anyMatch(hook -> hook instanceof SiblingAssociationHook)); + assertTrue( + metadataChangeLogProcessor.getHooks().stream() + .anyMatch(hook -> hook instanceof EntityChangeEventGeneratorHook)); + assertEquals( + 1, + metadataChangeLogProcessor.getHooks().stream() + .filter(hook -> hook instanceof IncidentsSummaryHook) + .count()); + } +} diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTestConfiguration.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java similarity index 96% rename from metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTestConfiguration.java rename to metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java index dac4e98b62513..2666f58de862e 100644 --- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTestConfiguration.java +++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringCommonTestConfiguration.java @@ -7,7 +7,6 @@ import com.datahub.metadata.ingestion.IngestionScheduler; import com.linkedin.entity.client.SystemEntityClient; import com.linkedin.metadata.boot.kafka.DataHubUpgradeKafkaListener; -import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.graph.elastic.ElasticSearchGraphService; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.search.elasticsearch.ElasticSearchService; @@ -39,7 +38,7 @@ "com.linkedin.gms.factory.entity.update.indices", "com.linkedin.gms.factory.timeline.eventgenerator" }) -public class MCLSpringTestConfiguration { +public class MCLSpringCommonTestConfiguration { @MockBean public EntityRegistry entityRegistry; @@ -58,8 +57,6 @@ public class MCLSpringTestConfiguration { @MockBean public ElasticSearchService searchService; - @MockBean public EntityService entityService; - @MockBean public FormService formService; @MockBean(name = "systemAuthentication") diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringGMSTestConfiguration.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringGMSTestConfiguration.java new file mode 100644 index 0000000000000..b7c5433ae1592 --- /dev/null +++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringGMSTestConfiguration.java @@ -0,0 +1,8 @@ +package com.linkedin.metadata.kafka.hook.spring; + +import com.linkedin.metadata.entity.EntityService; +import org.springframework.boot.test.mock.mockito.MockBean; + +public class MCLSpringGMSTestConfiguration { + @MockBean EntityService entityService; +} diff --git a/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTest.java b/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTest.java index 7b3716a894683..30bfeadb021a7 100644 --- a/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTest.java +++ b/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTest.java @@ -8,7 +8,7 @@ import com.linkedin.metadata.entity.restoreindices.RestoreIndicesResult; import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.metadata.jobs.common.health.kafka.KafkaHealthIndicator; -import java.util.stream.Stream; +import java.util.List; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; import org.springframework.boot.test.web.client.TestRestTemplate; @@ -32,8 +32,8 @@ public class MceConsumerApplicationTest extends AbstractTestNGSpringContextTests public void testRestliServletConfig() { RestoreIndicesResult mockResult = new RestoreIndicesResult(); mockResult.setRowsMigrated(100); - when(_mockEntityService.streamRestoreIndices(any(OperationContext.class), any(), any())) - .thenReturn(Stream.of(mockResult)); + when(_mockEntityService.restoreIndices(any(OperationContext.class), any(), any())) + .thenReturn(List.of(mockResult)); String response = this.restTemplate.postForObject( diff --git a/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTestConfiguration.java b/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTestConfiguration.java index feb3869abd391..08ff802c37e40 100644 --- a/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTestConfiguration.java +++ b/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTestConfiguration.java @@ -46,7 +46,8 @@ public SystemEntityClient systemEntityClient( restClient, new ExponentialBackoff(1), 1, - configurationProvider.getCache().getClient().getEntityClient()); + configurationProvider.getCache().getClient().getEntityClient(), + 1); } @MockBean public Database ebeanServer; diff --git a/metadata-models-custom/README.md b/metadata-models-custom/README.md index d0274f2bc4e0e..1d26251bc13c9 100644 --- a/metadata-models-custom/README.md +++ b/metadata-models-custom/README.md @@ -48,7 +48,7 @@ Change your directory to the metadata-models-custom folder and then run this com This will create a zip file in the build/dist folder. Then change your directory back to the main datahub folder and run ``` -./gradlew :metadata-models-custom:install +./gradlew :metadata-models-custom:modelDeploy ``` This will install the zip file as a datahub plugin. It is installed at `~/.datahub/plugins/models/` and if you list the directory you should see the following path if you are following the customDataQualityRules implementation example: `~/.datahub/plugins/models/mycompany-dq-model/0.0.0-dev/` @@ -467,6 +467,73 @@ plugins: aspectName: customDataQualityRules ``` +#### Spring Support + +Validators, mutators, and side-effects can also utilize Spring to inject dependencies and autoconfigure them. While Spring is +not required, it is possible to use Spring to both inject autoconfiguration and the plugins themselves. An example Spring-enabled +validator has been included in the package `com.linkedin.metadata.aspect.plugins.spring.validation`. The plugin +class loader and Spring context is isolated so conflicts between DataHub and custom classes are avoided. + +The configuration of a Spring enabled plugin looks like the following, note the addition of `spring.enabled: true` below. +A list of packages to scan for Spring configuration and components should also be provided which should include +your custom classes with Spring annotations per the `packageScan` below. + +```yaml +plugins: + aspectPayloadValidators: + - className: 'com.linkedin.metadata.aspect.plugins.spring.validation.CustomDataQualityRulesValidator' + packageScan: + - com.linkedin.metadata.aspect.plugins.spring.validation + enabled: true + supportedOperations: + - UPSERT + supportedEntityAspectNames: + - entityName: 'dataset' + aspectName: customDataQualityRules + spring: + enabled: true +``` + +In the Spring example, a configuration component called `CustomDataQualityRulesConfig` provides a string `Spring injection works!` demonstrating +injection of a bean into a function which is called by Spring after constructing the custom validator plugin. + +```java +@Configuration +public class CustomDataQualityRulesConfig { + @Bean("myCustomMessage") + public String myCustomMessage() { + return "Spring injection works!"; + } +} +``` + +```java +@Component +@Import(CustomDataQualityRulesConfig.class) +public class CustomDataQualityRulesValidator extends AspectPayloadValidator { + @Autowired + @Qualifier("myCustomMessage") + private String myCustomMessage; + + @PostConstruct + public void message() { + System.out.println(myCustomMessage); + } + + // ... +} +``` + +Example Log: + +``` +INFO c.l.m.m.r.PluginEntityRegistryLoader:187 - com.linkedin.metadata.models.registry.PluginEntityRegistryLoader@144e466d: Registry mycompany-dq-model:0.0.0-dev discovered. Loading... +INFO c.l.m.m.registry.PatchEntityRegistry:143 - Loading custom config entity file: /etc/datahub/plugins/models/mycompany-dq-model/0.0.0-dev/entity-registry.yaml, dir: /etc/datahub/plugins/models/mycompany-dq-model/0.0.0-dev +INFO c.l.m.m.registry.PatchEntityRegistry:143 - Loading custom config entity file: /etc/datahub/plugins/models/mycompany-dq-model/0.0.0-dev/entity-registry.yaml, dir: /etc/datahub/plugins/models/mycompany-dq-model/0.0.0-dev +Spring injection works! +``` + + ## The Future Hopefully this repository shows you how easily you can extend and customize DataHub's metadata model! diff --git a/metadata-models-custom/build.gradle b/metadata-models-custom/build.gradle index 8bf9d3b2f491e..412c19194c733 100644 --- a/metadata-models-custom/build.gradle +++ b/metadata-models-custom/build.gradle @@ -14,7 +14,7 @@ buildscript { } plugins { - id 'java-library' + id 'java' id 'maven-publish' id 'pegasus' } @@ -27,17 +27,14 @@ if (project.hasProperty('projVersion')) { dependencies { implementation spec.product.pegasus.data - // Uncomment these if you want to depend on models defined in core datahub - // DataQualityRuleEvent in this example uses Urn and TimeseriesAspectBase - implementation project(':li-utils') - dataModel project(':li-utils') - implementation project(':metadata-models') - dataModel project(':metadata-models') - - // Required for custom code plugins - implementation project(':entity-registry') - // Required for MCL/MCP hooks - implementation project (':metadata-io') + + // Core DataHub dependencies + implementation project(path: ':metadata-integration:java:custom-plugin-lib', configuration: 'shadow') + // DataModel DataHub dependencies + dataModel project(path: ':metadata-integration:java:custom-plugin-lib', configuration: 'shadow') + + // Required for Spring-enabled plugins only + implementation externalDependency.springBootAutoconfigure } def deployBaseDir = findProperty('pluginModelsDir') ?: file(project.gradle.gradleUserHomeDir.parent + "/.datahub/plugins/models") diff --git a/metadata-models-custom/registry/entity-registry.yaml b/metadata-models-custom/registry/entity-registry.yaml index e6180172837e0..70790bafb4ef5 100644 --- a/metadata-models-custom/registry/entity-registry.yaml +++ b/metadata-models-custom/registry/entity-registry.yaml @@ -16,6 +16,17 @@ plugins: supportedEntityAspectNames: - entityName: 'dataset' aspectName: customDataQualityRules + - className: 'com.linkedin.metadata.aspect.plugins.spring.validation.CustomDataQualityRulesValidator' + enabled: true + supportedOperations: + - UPSERT + supportedEntityAspectNames: + - entityName: 'dataset' + aspectName: customDataQualityRules + spring: + enabled: true + packageScan: + - com.linkedin.metadata.aspect.plugins.spring.validation mutationHooks: - className: 'com.linkedin.metadata.aspect.plugins.hooks.CustomDataQualityRulesMutator' enabled: true diff --git a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCLSideEffect.java b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCLSideEffect.java index 7960ec5fa6322..e8783541c9916 100644 --- a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCLSideEffect.java +++ b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCLSideEffect.java @@ -14,9 +14,7 @@ public class CustomDataQualityRulesMCLSideEffect extends MCLSideEffect { - public CustomDataQualityRulesMCLSideEffect(AspectPluginConfig config) { - super(config); - } + private AspectPluginConfig config; @Override protected Stream applyMCLSideEffect( @@ -69,4 +67,16 @@ private Optional buildEvent(MetadataChangeLog originMCP) { return Optional.empty(); } + + @Nonnull + @Override + public AspectPluginConfig getConfig() { + return config; + } + + @Override + public CustomDataQualityRulesMCLSideEffect setConfig(@Nonnull AspectPluginConfig config) { + this.config = config; + return this; + } } diff --git a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCPSideEffect.java b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCPSideEffect.java index 193c56d904e99..de9d3419c216e 100644 --- a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCPSideEffect.java +++ b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMCPSideEffect.java @@ -12,9 +12,7 @@ public class CustomDataQualityRulesMCPSideEffect extends MCPSideEffect { - public CustomDataQualityRulesMCPSideEffect(AspectPluginConfig aspectPluginConfig) { - super(aspectPluginConfig); - } + private AspectPluginConfig config; @Override protected Stream applyMCPSideEffect( @@ -34,4 +32,16 @@ protected Stream applyMCPSideEffect( .build(retrieverContext.getAspectRetriever()); }); } + + @Nonnull + @Override + public AspectPluginConfig getConfig() { + return config; + } + + @Override + public CustomDataQualityRulesMCPSideEffect setConfig(@Nonnull AspectPluginConfig config) { + this.config = config; + return this; + } } diff --git a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMutator.java b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMutator.java index 9b48ed2c9975c..6b2f31a505c62 100644 --- a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMutator.java +++ b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/hooks/CustomDataQualityRulesMutator.java @@ -13,9 +13,7 @@ public class CustomDataQualityRulesMutator extends MutationHook { - public CustomDataQualityRulesMutator(AspectPluginConfig config) { - super(config); - } + private AspectPluginConfig config; @Override protected Stream> writeMutation( @@ -43,4 +41,16 @@ protected Stream> writeMutation( .filter(Objects::nonNull) .map(changeMCP -> Pair.of(changeMCP, true)); } + + @Nonnull + @Override + public AspectPluginConfig getConfig() { + return config; + } + + @Override + public CustomDataQualityRulesMutator setConfig(@Nonnull AspectPluginConfig config) { + this.config = config; + return this; + } } diff --git a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/spring/validation/CustomDataQualityRulesConfig.java b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/spring/validation/CustomDataQualityRulesConfig.java new file mode 100644 index 0000000000000..f6dbaa2c7fcbe --- /dev/null +++ b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/spring/validation/CustomDataQualityRulesConfig.java @@ -0,0 +1,12 @@ +package com.linkedin.metadata.aspect.plugins.spring.validation; + +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +@Configuration +public class CustomDataQualityRulesConfig { + @Bean("myCustomMessage") + public String myCustomMessage() { + return "Spring injection works!"; + } +} diff --git a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/spring/validation/CustomDataQualityRulesValidator.java b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/spring/validation/CustomDataQualityRulesValidator.java new file mode 100644 index 0000000000000..3cd1acaf9645d --- /dev/null +++ b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/spring/validation/CustomDataQualityRulesValidator.java @@ -0,0 +1,111 @@ +package com.linkedin.metadata.aspect.plugins.spring.validation; + +import com.linkedin.metadata.aspect.RetrieverContext; +import com.linkedin.metadata.aspect.batch.BatchItem; +import com.linkedin.metadata.aspect.batch.ChangeMCP; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.validation.AspectPayloadValidator; +import com.linkedin.metadata.aspect.plugins.validation.AspectValidationException; +import com.mycompany.dq.DataQualityRules; +import java.util.Collection; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import javax.annotation.Nonnull; +import javax.annotation.PostConstruct; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.context.annotation.Import; +import org.springframework.stereotype.Component; + +/** + * Same as the non-Spring example however this is an example of using Spring to inject the plugins. + * + *

This also allows use of other Spring enabled libraries + */ +@Component +@Import(CustomDataQualityRulesConfig.class) +public class CustomDataQualityRulesValidator extends AspectPayloadValidator { + @Autowired + @Qualifier("myCustomMessage") + private String myCustomMessage; + + private AspectPluginConfig config; + + @PostConstruct + public void message() { + System.out.println(myCustomMessage); + } + + @Override + protected Stream validateProposedAspects( + @Nonnull Collection mcpItems, + @Nonnull RetrieverContext retrieverContext) { + + return mcpItems.stream() + .map( + item -> { + DataQualityRules rules = new DataQualityRules(item.getRecordTemplate().data()); + // Enforce at least 1 rule + return rules.getRules().isEmpty() + ? new AspectValidationException( + item.getUrn(), item.getAspectName(), "At least one rule is required.") + : null; + }) + .filter(Objects::nonNull); + } + + @Override + protected Stream validatePreCommitAspects( + @Nonnull Collection changeMCPs, @Nonnull RetrieverContext retrieverContext) { + return changeMCPs.stream() + .flatMap( + changeMCP -> { + if (changeMCP.getPreviousSystemAspect() != null) { + DataQualityRules oldRules = changeMCP.getPreviousAspect(DataQualityRules.class); + DataQualityRules newRules = changeMCP.getAspect(DataQualityRules.class); + + Map newFieldTypeMap = + newRules.getRules().stream() + .filter(rule -> rule.getField() != null) + .map(rule -> Map.entry(rule.getField(), rule.getType())) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + + // Ensure the old and new field type is the same + return oldRules.getRules().stream() + .map( + oldRule -> { + if (!newFieldTypeMap + .getOrDefault(oldRule.getField(), oldRule.getType()) + .equals(oldRule.getType())) { + return new AspectValidationException( + changeMCP.getUrn(), + changeMCP.getAspectName(), + String.format( + "Field type mismatch. Field: %s Old: %s New: %s", + oldRule.getField(), + oldRule.getType(), + newFieldTypeMap.get(oldRule.getField()))); + } + return null; + }) + .filter(Objects::nonNull); + } + + return Stream.empty(); + }); + } + + @Nonnull + @Override + public AspectPluginConfig getConfig() { + return config; + } + + @Override + public CustomDataQualityRulesValidator setConfig(@Nonnull AspectPluginConfig config) { + this.config = config; + return this; + } +} diff --git a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/validation/CustomDataQualityRulesValidator.java b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/validation/CustomDataQualityRulesValidator.java index 7a785bfbd1abb..b95d3381d9c8f 100644 --- a/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/validation/CustomDataQualityRulesValidator.java +++ b/metadata-models-custom/src/main/java/com/linkedin/metadata/aspect/plugins/validation/CustomDataQualityRulesValidator.java @@ -14,9 +14,7 @@ public class CustomDataQualityRulesValidator extends AspectPayloadValidator { - public CustomDataQualityRulesValidator(AspectPluginConfig config) { - super(config); - } + private AspectPluginConfig config; @Override protected Stream validateProposedAspects( @@ -75,4 +73,16 @@ protected Stream validatePreCommitAspects( return Stream.empty(); }); } + + @Nonnull + @Override + public AspectPluginConfig getConfig() { + return config; + } + + @Override + public CustomDataQualityRulesValidator setConfig(@Nonnull AspectPluginConfig config) { + this.config = config; + return this; + } } diff --git a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionInfo.pdl index e161270145a88..5b60aa18e87da 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionInfo.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/assertion/AssertionInfo.pdl @@ -37,6 +37,11 @@ record AssertionInfo includes CustomProperties, ExternalReference { */ SQL + /** + * A structured assertion targeting a specific column or field of the Dataset. + */ + FIELD + /** * A schema or structural assertion. * diff --git a/metadata-models/src/main/pegasus/com/linkedin/connection/DataHubConnectionDetails.pdl b/metadata-models/src/main/pegasus/com/linkedin/connection/DataHubConnectionDetails.pdl new file mode 100644 index 0000000000000..81f57abf2dac4 --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/connection/DataHubConnectionDetails.pdl @@ -0,0 +1,38 @@ +namespace com.linkedin.connection + +import com.linkedin.common.Urn + +/** + * Information about a connection to an external platform. + */ +@Aspect = { + "name": "dataHubConnectionDetails" +} +record DataHubConnectionDetails { + /** + * The type of the connection. This defines the schema / encoding of the connection details. + */ + @Searchable = {} + type: enum DataHubConnectionDetailsType { + /** + * A json-encoded set of connection details + */ + JSON + } + + /** + * Display name of the connection + */ + @Searchable = { + "fieldType": "TEXT_PARTIAL", + "enableAutocomplete": true, + "boostScore": 10.0 + } + name: optional string + + /** + * An JSON payload containing raw connection details. + * This will be present if the type is JSON. + */ + json: optional DataHubJsonConnection +} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/connection/DataHubJsonConnection.pdl b/metadata-models/src/main/pegasus/com/linkedin/connection/DataHubJsonConnection.pdl new file mode 100644 index 0000000000000..996e2a3238bd5 --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/connection/DataHubJsonConnection.pdl @@ -0,0 +1,11 @@ +namespace com.linkedin.connection + +/** + * A set of connection details consisting of an encrypted JSON blob. + */ +record DataHubJsonConnection { + /** + * The encrypted JSON connection details. + */ + encryptedBlob: string +} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/execution/ExecutionRequestInput.pdl b/metadata-models/src/main/pegasus/com/linkedin/execution/ExecutionRequestInput.pdl index 4bcbd81caa2a1..0afda2460924b 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/execution/ExecutionRequestInput.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/execution/ExecutionRequestInput.pdl @@ -1,5 +1,7 @@ namespace com.linkedin.execution +import com.linkedin.common.Urn + /** * An request to execution some remote logic or action. * TODO: Determine who is responsible for emitting execution request success or failure. Executor? @@ -37,4 +39,12 @@ record ExecutionRequestInput { "queryByDefault": false } requestedAt: long + + /** + * Urn of the actor who created this execution request. + */ + @Searchable = { + "fieldType": "URN" + } + actorUrn: optional Urn } \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/metadata/key/DataHubConnectionKey.pdl b/metadata-models/src/main/pegasus/com/linkedin/metadata/key/DataHubConnectionKey.pdl new file mode 100644 index 0000000000000..cd851d8382759 --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/metadata/key/DataHubConnectionKey.pdl @@ -0,0 +1,15 @@ +namespace com.linkedin.metadata.key + +/** + * Key for a Connection + */ +@Aspect = { + "name": "dataHubConnectionKey" +} +record DataHubConnectionKey { + /** + * A unique identifier for the connection. + */ + @Searchable = {} + id: string +} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/structured/StructuredPropertyDefinition.pdl b/metadata-models/src/main/pegasus/com/linkedin/structured/StructuredPropertyDefinition.pdl index 1b263b679531a..178d7b3cf4376 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/structured/StructuredPropertyDefinition.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/structured/StructuredPropertyDefinition.pdl @@ -70,5 +70,13 @@ record StructuredPropertyDefinition { * from the logical type. */ searchConfiguration: optional DataHubSearchConfig + + /** + * Whether the structured property value is immutable once applied to an entity. + */ + @Searchable = { + "fieldType": "BOOLEAN" + } + immutable: boolean = false } diff --git a/metadata-models/src/main/resources/entity-registry.yml b/metadata-models/src/main/resources/entity-registry.yml index d7ab1f948b411..60ef05ea55b2c 100644 --- a/metadata-models/src/main/resources/entity-registry.yml +++ b/metadata-models/src/main/resources/entity-registry.yml @@ -570,6 +570,12 @@ entities: - formInfo - dynamicFormAssignment - ownership + - name: dataHubConnection + category: internal + keyAspect: dataHubConnectionKey + aspects: + - dataHubConnectionDetails + - dataPlatformInstance events: plugins: aspectPayloadValidators: @@ -588,6 +594,7 @@ plugins: supportedOperations: - CREATE - UPSERT + - DELETE supportedEntityAspectNames: - entityName: '*' aspectName: structuredProperties diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/ObjectMapperContext.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/ObjectMapperContext.java new file mode 100644 index 0000000000000..2e96e48338a66 --- /dev/null +++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/ObjectMapperContext.java @@ -0,0 +1,48 @@ +package io.datahubproject.metadata.context; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.core.StreamReadConstraints; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.metadata.Constants; +import java.util.Optional; +import javax.annotation.Nonnull; +import lombok.Builder; +import lombok.Getter; + +@Getter +@Builder +public class ObjectMapperContext implements ContextInterface { + + public static ObjectMapper defaultMapper = new ObjectMapper(); + + static { + defaultMapper.setSerializationInclusion(JsonInclude.Include.NON_NULL); + int maxSize = + Integer.parseInt( + System.getenv() + .getOrDefault( + Constants.INGESTION_MAX_SERIALIZED_STRING_LENGTH, + Constants.MAX_JACKSON_STRING_SIZE)); + defaultMapper + .getFactory() + .setStreamReadConstraints(StreamReadConstraints.builder().maxStringLength(maxSize).build()); + } + + public static ObjectMapperContext DEFAULT = ObjectMapperContext.builder().build(); + + @Nonnull private final ObjectMapper objectMapper; + + @Override + public Optional getCacheKeyComponent() { + return Optional.empty(); + } + + public static class ObjectMapperContextBuilder { + public ObjectMapperContext build() { + if (this.objectMapper == null) { + objectMapper(defaultMapper); + } + return new ObjectMapperContext(this.objectMapper); + } + } +} diff --git a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java index 2e4da5abe7f82..56247d61337e8 100644 --- a/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java +++ b/metadata-operation-context/src/main/java/io/datahubproject/metadata/context/OperationContext.java @@ -2,6 +2,7 @@ import com.datahub.authentication.Authentication; import com.datahub.plugins.auth.authorization.Authorizer; +import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableSet; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; @@ -120,6 +121,24 @@ public static OperationContext asSystem( @Nullable ServicesRegistryContext servicesRegistryContext, @Nullable IndexConvention indexConvention, @Nullable RetrieverContext retrieverContext) { + return asSystem( + config, + systemAuthentication, + entityRegistry, + servicesRegistryContext, + indexConvention, + retrieverContext, + ObjectMapperContext.DEFAULT); + } + + public static OperationContext asSystem( + @Nonnull OperationContextConfig config, + @Nonnull Authentication systemAuthentication, + @Nullable EntityRegistry entityRegistry, + @Nullable ServicesRegistryContext servicesRegistryContext, + @Nullable IndexConvention indexConvention, + @Nullable RetrieverContext retrieverContext, + @Nonnull ObjectMapperContext objectMapperContext) { ActorContext systemActorContext = ActorContext.builder().systemAuth(true).authentication(systemAuthentication).build(); @@ -139,6 +158,7 @@ public static OperationContext asSystem( // Authorizer.EMPTY doesn't actually apply to system auth .authorizerContext(AuthorizerContext.builder().authorizer(Authorizer.EMPTY).build()) .retrieverContext(retrieverContext) + .objectMapperContext(objectMapperContext) .build(systemAuthentication); } @@ -152,6 +172,7 @@ public static OperationContext asSystem( @Nullable private final RequestContext requestContext; @Nullable private final ViewAuthorizationContext viewAuthorizationContext; @Nullable private final RetrieverContext retrieverContext; + @Nonnull private final ObjectMapperContext objectMapperContext; public OperationContext withSearchFlags( @Nonnull Function flagDefaults) { @@ -298,6 +319,7 @@ public String getGlobalContextId() { getRetrieverContext().isPresent() ? getRetrieverContext().get() : EmptyContext.EMPTY) + .add(getObjectMapperContext()) .build() .stream() .map(ContextInterface::getCacheKeyComponent) @@ -360,6 +382,11 @@ public String getRequestID() { return Optional.ofNullable(requestContext).map(RequestContext::getRequestID).orElse(""); } + @Nonnull + public ObjectMapper getObjectMapper() { + return objectMapperContext.getObjectMapper(); + } + public static class OperationContextBuilder { @Nonnull @@ -392,7 +419,10 @@ public OperationContext build(@Nonnull ActorContext sessionActor) { this.servicesRegistryContext, this.requestContext, this.viewAuthorizationContext, - this.retrieverContext); + this.retrieverContext, + this.objectMapperContext != null + ? this.objectMapperContext + : ObjectMapperContext.DEFAULT); } private OperationContext build() { diff --git a/metadata-service/auth-servlet-impl/src/main/java/com/datahub/auth/authentication/AuthServiceController.java b/metadata-service/auth-servlet-impl/src/main/java/com/datahub/auth/authentication/AuthServiceController.java index bb6aa1ed231d7..71eaca71a3641 100644 --- a/metadata-service/auth-servlet-impl/src/main/java/com/datahub/auth/authentication/AuthServiceController.java +++ b/metadata-service/auth-servlet-impl/src/main/java/com/datahub/auth/authentication/AuthServiceController.java @@ -123,9 +123,7 @@ CompletableFuture> generateSessionTokenForUser( try { bodyJson = mapper.readTree(jsonStr); } catch (JsonProcessingException e) { - log.error( - String.format( - "Failed to parse json while attempting to generate session token %s", jsonStr)); + log.error("Failed to parse json while attempting to generate session token {}", jsonStr, e); return CompletableFuture.completedFuture(new ResponseEntity<>(HttpStatus.BAD_REQUEST)); } if (bodyJson == null) { @@ -139,7 +137,7 @@ CompletableFuture> generateSessionTokenForUser( return CompletableFuture.completedFuture(new ResponseEntity<>(HttpStatus.BAD_REQUEST)); } - log.debug(String.format("Attempting to generate session token for user %s", userId.asText())); + log.info("Attempting to generate session token for user {}", userId.asText()); final String actorId = AuthenticationContext.getAuthentication().getActor().getId(); return CompletableFuture.supplyAsync( () -> { @@ -147,14 +145,20 @@ CompletableFuture> generateSessionTokenForUser( if (isAuthorizedToGenerateSessionToken(actorId)) { try { // 2. Generate a new DataHub JWT + final long sessionTokenDurationMs = + _configProvider.getAuthentication().getSessionTokenDurationMs(); final String token = _statelessTokenService.generateAccessToken( TokenType.SESSION, new Actor(ActorType.USER, userId.asText()), - _configProvider.getAuthentication().getSessionTokenDurationMs()); + sessionTokenDurationMs); + log.info( + "Successfully generated session token for user: {}, duration: {} ms", + userId.asText(), + sessionTokenDurationMs); return new ResponseEntity<>(buildTokenResponse(token), HttpStatus.OK); } catch (Exception e) { - log.error("Failed to generate session token for user", e); + log.error("Failed to generate session token for user: {}", userId.asText(), e); return new ResponseEntity<>(HttpStatus.INTERNAL_SERVER_ERROR); } } @@ -189,8 +193,7 @@ CompletableFuture> signUp(final HttpEntity httpEn try { bodyJson = mapper.readTree(jsonStr); } catch (JsonProcessingException e) { - log.error( - String.format("Failed to parse json while attempting to create native user %s", jsonStr)); + log.debug("Failed to parse json while attempting to create native user", e); return CompletableFuture.completedFuture(new ResponseEntity<>(HttpStatus.BAD_REQUEST)); } if (bodyJson == null) { @@ -229,13 +232,13 @@ CompletableFuture> signUp(final HttpEntity httpEn String passwordString = password.asText(); String inviteTokenString = inviteToken.asText(); Authentication auth = AuthenticationContext.getAuthentication(); - log.debug(String.format("Attempting to create native user %s", userUrnString)); + log.info("Attempting to create native user {}", userUrnString); return CompletableFuture.supplyAsync( () -> { try { Urn inviteTokenUrn = _inviteTokenService.getInviteTokenUrn(inviteTokenString); if (!_inviteTokenService.isInviteTokenValid(systemOperationContext, inviteTokenUrn)) { - log.error(String.format("Invalid invite token %s", inviteTokenString)); + log.error("Invalid invite token {}", inviteTokenString); return new ResponseEntity<>(HttpStatus.BAD_REQUEST); } @@ -247,10 +250,10 @@ CompletableFuture> signUp(final HttpEntity httpEn titleString, passwordString); String response = buildSignUpResponse(); + log.info("Created native user {}", userUrnString); return new ResponseEntity<>(response, HttpStatus.OK); } catch (Exception e) { - log.error( - String.format("Failed to create credentials for native user %s", userUrnString), e); + log.error("Failed to create credentials for native user {}", userUrnString, e); return new ResponseEntity<>(HttpStatus.INTERNAL_SERVER_ERROR); } }); @@ -279,8 +282,7 @@ CompletableFuture> resetNativeUserCredentials( try { bodyJson = mapper.readTree(jsonStr); } catch (JsonProcessingException e) { - log.error( - String.format("Failed to parse json while attempting to create native user %s", jsonStr)); + log.debug("Failed to parse json while attempting to create native user", e); return CompletableFuture.completedFuture(new ResponseEntity<>(HttpStatus.BAD_REQUEST)); } if (bodyJson == null) { @@ -300,17 +302,17 @@ CompletableFuture> resetNativeUserCredentials( String passwordString = password.asText(); String resetTokenString = resetToken.asText(); Authentication auth = AuthenticationContext.getAuthentication(); - log.debug(String.format("Attempting to reset credentials for native user %s", userUrnString)); + log.info("Attempting to reset credentials for native user {}", userUrnString); return CompletableFuture.supplyAsync( () -> { try { _nativeUserService.resetCorpUserCredentials( systemOperationContext, userUrnString, passwordString, resetTokenString); String response = buildResetNativeUserCredentialsResponse(); + log.info("Reset credentials for native user {}", userUrnString); return new ResponseEntity<>(response, HttpStatus.OK); } catch (Exception e) { - log.error( - String.format("Failed to reset credentials for native user %s", userUrnString), e); + log.error("Failed to reset credentials for native user {}", userUrnString, e); return new ResponseEntity<>(HttpStatus.INTERNAL_SERVER_ERROR); } }); @@ -338,9 +340,7 @@ CompletableFuture> verifyNativeUserCredentials( try { bodyJson = mapper.readTree(jsonStr); } catch (JsonProcessingException e) { - log.error( - String.format( - "Failed to parse json while attempting to verify native user password %s", jsonStr)); + log.debug("Failed to parse json while attempting to verify native user password", e); return CompletableFuture.completedFuture(new ResponseEntity<>(HttpStatus.BAD_REQUEST)); } if (bodyJson == null) { @@ -357,7 +357,7 @@ CompletableFuture> verifyNativeUserCredentials( String userUrnString = userUrn.asText(); String passwordString = password.asText(); - log.debug(String.format("Attempting to verify credentials for native user %s", userUrnString)); + log.info("Attempting to verify credentials for native user {}", userUrnString); return CompletableFuture.supplyAsync( () -> { try { @@ -365,10 +365,13 @@ CompletableFuture> verifyNativeUserCredentials( _nativeUserService.doesPasswordMatch( systemOperationContext, userUrnString, passwordString); String response = buildVerifyNativeUserPasswordResponse(doesPasswordMatch); + log.info( + "Verified credentials for native user: {}, result: {}", + userUrnString, + doesPasswordMatch); return new ResponseEntity<>(response, HttpStatus.OK); } catch (Exception e) { - log.error( - String.format("Failed to verify credentials for native user %s", userUrnString), e); + log.error("Failed to verify credentials for native user {}", userUrnString, e); return new ResponseEntity<>(HttpStatus.INTERNAL_SERVER_ERROR); } }); @@ -383,9 +386,7 @@ CompletableFuture> track(final HttpEntity httpEnt try { bodyJson = mapper.readTree(jsonStr); } catch (JsonProcessingException e) { - log.error( - String.format( - "Failed to parse json while attempting to track analytics event %s", jsonStr)); + log.error("Failed to parse json while attempting to track analytics event {}", jsonStr); return CompletableFuture.completedFuture(new ResponseEntity<>(HttpStatus.BAD_REQUEST)); } if (bodyJson == null) { diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/CustomConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/CustomConfiguration.java index 2a492c746cc9c..c8925581899c4 100644 --- a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/CustomConfiguration.java +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/CustomConfiguration.java @@ -30,9 +30,15 @@ public CustomSearchConfiguration resolve(ObjectMapper mapper) throws IOException log.info("Custom search configuration found in classpath: {}", file); return mapper.readValue(stream, CustomSearchConfiguration.class); } catch (FileNotFoundException e) { + log.info("Custom search configuration was NOT found in the classpath."); try (InputStream stream = new FileSystemResource(file).getInputStream()) { log.info("Custom search configuration found in filesystem: {}", file); return mapper.readValue(stream, CustomSearchConfiguration.class); + } catch (Exception e2) { + log.warn( + "Custom search enabled, however there was an error loading configuration: " + file, + e2); + return null; } } } else { diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/custom/AutocompleteConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/custom/AutocompleteConfiguration.java new file mode 100644 index 0000000000000..6a7565c2a55cf --- /dev/null +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/custom/AutocompleteConfiguration.java @@ -0,0 +1,34 @@ +package com.linkedin.metadata.config.search.custom; + +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder; +import java.util.Collections; +import java.util.Map; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +@Builder(toBuilder = true) +@Getter +@ToString +@EqualsAndHashCode +@JsonDeserialize(builder = AutocompleteConfiguration.AutocompleteConfigurationBuilder.class) +public class AutocompleteConfiguration { + // match this configuration based on query string regex match + private String queryRegex; + // include the default autocomplete query + @Builder.Default private boolean defaultQuery = true; + // override or extend default autocomplete query + private BoolQueryConfiguration boolQuery; + // inherit the query configuration's function score (disabled if functionScore exists) + @Builder.Default private boolean inheritFunctionScore = true; + + // additional function scores to apply for ranking + @Builder.Default private Map functionScore = Collections.emptyMap(); + + @JsonPOJOBuilder(withPrefix = "") + public static class AutocompleteConfigurationBuilder {} +} diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/custom/CustomSearchConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/custom/CustomSearchConfiguration.java index e6756ca8f0da8..d2a908050cf30 100644 --- a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/custom/CustomSearchConfiguration.java +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/custom/CustomSearchConfiguration.java @@ -2,6 +2,7 @@ import com.fasterxml.jackson.databind.annotation.JsonDeserialize; import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder; +import java.util.Collections; import java.util.List; import lombok.Builder; import lombok.EqualsAndHashCode; @@ -13,7 +14,10 @@ @JsonDeserialize(builder = CustomSearchConfiguration.CustomSearchConfigurationBuilder.class) public class CustomSearchConfiguration { - private List queryConfigurations; + @Builder.Default private List queryConfigurations = Collections.emptyList(); + + @Builder.Default + private List autocompleteConfigurations = Collections.emptyList(); @JsonPOJOBuilder(withPrefix = "") public static class CustomSearchConfigurationBuilder {} diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/custom/QueryConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/custom/QueryConfiguration.java index e3a9d076dbef2..c1801b0947016 100644 --- a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/custom/QueryConfiguration.java +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/custom/QueryConfiguration.java @@ -2,6 +2,7 @@ import com.fasterxml.jackson.databind.annotation.JsonDeserialize; import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder; +import java.util.Collections; import java.util.Map; import lombok.Builder; import lombok.EqualsAndHashCode; @@ -29,7 +30,7 @@ public class QueryConfiguration { @Builder.Default private boolean exactMatchQuery = true; @Builder.Default private boolean prefixMatchQuery = true; private BoolQueryConfiguration boolQuery; - private Map functionScore; + @Builder.Default private Map functionScore = Collections.emptyMap(); @JsonPOJOBuilder(withPrefix = "") public static class QueryConfigurationBuilder {} diff --git a/metadata-service/configuration/src/main/resources/application.yaml b/metadata-service/configuration/src/main/resources/application.yaml index c6397c3ce5abb..5991e413a43c8 100644 --- a/metadata-service/configuration/src/main/resources/application.yaml +++ b/metadata-service/configuration/src/main/resources/application.yaml @@ -380,6 +380,12 @@ views: entityClient: retryInterval: ${ENTITY_CLIENT_RETRY_INTERVAL:2} numRetries: ${ENTITY_CLIENT_NUM_RETRIES:3} + java: + get: + batchSize: ${ENTITY_CLIENT_JAVA_GET_BATCH_SIZE:375} # matches EbeanAspectDao batch size + restli: + get: + batchSize: ${ENTITY_CLIENT_RESTLI_GET_BATCH_SIZE:100} # limited to prevent exceeding restli URI size limit usageClient: retryInterval: ${USAGE_CLIENT_RETRY_INTERVAL:2} @@ -437,6 +443,7 @@ graphQL: query: complexityLimit: ${GRAPHQL_QUERY_COMPLEXITY_LIMIT:2000} depthLimit: ${GRAPHQL_QUERY_DEPTH_LIMIT:50} + introspectionEnabled: ${GRAPHQL_QUERY_INTROSPECTION_ENABLED:true} springdoc.api-docs.groups.enabled: true diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/connection/ConnectionServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/connection/ConnectionServiceFactory.java new file mode 100644 index 0000000000000..07cc59722e91f --- /dev/null +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/connection/ConnectionServiceFactory.java @@ -0,0 +1,19 @@ +package com.linkedin.gms.factory.connection; + +import com.linkedin.entity.client.SystemEntityClient; +import com.linkedin.metadata.connection.ConnectionService; +import javax.annotation.Nonnull; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +@Configuration +public class ConnectionServiceFactory { + @Bean(name = "connectionService") + @Nonnull + protected ConnectionService getInstance( + @Qualifier("systemEntityClient") final SystemEntityClient systemEntityClient) + throws Exception { + return new ConnectionService(systemEntityClient); + } +} diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityclient/JavaEntityClientFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityclient/JavaEntityClientFactory.java index 2f92f0ad5bf9f..fc35e6d045d0c 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityclient/JavaEntityClientFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityclient/JavaEntityClientFactory.java @@ -16,6 +16,7 @@ import com.linkedin.metadata.timeseries.TimeseriesAspectService; import javax.inject.Singleton; import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.beans.factory.annotation.Value; import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; @@ -37,7 +38,8 @@ public EntityClient entityClient( final @Qualifier("timeseriesAspectService") TimeseriesAspectService _timeseriesAspectService, final @Qualifier("relationshipSearchService") LineageSearchService _lineageSearchService, final @Qualifier("kafkaEventProducer") EventProducer _eventProducer, - final RollbackService rollbackService) { + final RollbackService rollbackService, + final @Value("${entityClient.restli.get.batchSize:375}") int batchGetV2Size) { return new JavaEntityClient( _entityService, _deleteEntityService, @@ -47,7 +49,8 @@ public EntityClient entityClient( _lineageSearchService, _timeseriesAspectService, rollbackService, - _eventProducer); + _eventProducer, + batchGetV2Size); } @Bean("systemEntityClient") @@ -63,7 +66,8 @@ public SystemEntityClient systemEntityClient( final @Qualifier("relationshipSearchService") LineageSearchService _lineageSearchService, final @Qualifier("kafkaEventProducer") EventProducer _eventProducer, final RollbackService rollbackService, - final EntityClientCacheConfig entityClientCacheConfig) { + final EntityClientCacheConfig entityClientCacheConfig, + final @Value("${entityClient.restli.get.batchSize:375}") int batchGetV2Size) { return new SystemJavaEntityClient( _entityService, _deleteEntityService, @@ -74,6 +78,7 @@ public SystemEntityClient systemEntityClient( _timeseriesAspectService, rollbackService, _eventProducer, - entityClientCacheConfig); + entityClientCacheConfig, + batchGetV2Size); } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityclient/RestliEntityClientFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityclient/RestliEntityClientFactory.java index 9da7fc706d08a..2d9f570e1b07d 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityclient/RestliEntityClientFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityclient/RestliEntityClientFactory.java @@ -29,7 +29,8 @@ public EntityClient entityClient( @Value("${datahub.gms.uri}") String gmsUri, @Value("${datahub.gms.sslContext.protocol}") String gmsSslProtocol, @Value("${entityClient.retryInterval:2}") int retryInterval, - @Value("${entityClient.numRetries:3}") int numRetries) { + @Value("${entityClient.numRetries:3}") int numRetries, + final @Value("${entityClient.restli.get.batchSize:150}") int batchGetV2Size) { final Client restClient; if (gmsUri != null) { restClient = DefaultRestliClientFactory.getRestLiClient(URI.create(gmsUri), gmsSslProtocol); @@ -37,7 +38,8 @@ public EntityClient entityClient( restClient = DefaultRestliClientFactory.getRestLiClient(gmsHost, gmsPort, gmsUseSSL, gmsSslProtocol); } - return new RestliEntityClient(restClient, new ExponentialBackoff(retryInterval), numRetries); + return new RestliEntityClient( + restClient, new ExponentialBackoff(retryInterval), numRetries, batchGetV2Size); } @Bean("systemEntityClient") @@ -50,7 +52,8 @@ public SystemEntityClient systemEntityClient( @Value("${datahub.gms.sslContext.protocol}") String gmsSslProtocol, @Value("${entityClient.retryInterval:2}") int retryInterval, @Value("${entityClient.numRetries:3}") int numRetries, - final EntityClientCacheConfig entityClientCacheConfig) { + final EntityClientCacheConfig entityClientCacheConfig, + final @Value("${entityClient.restli.get.batchSize:150}") int batchGetV2Size) { final Client restClient; if (gmsUri != null) { @@ -60,6 +63,10 @@ public SystemEntityClient systemEntityClient( DefaultRestliClientFactory.getRestLiClient(gmsHost, gmsPort, gmsUseSSL, gmsSslProtocol); } return new SystemRestliEntityClient( - restClient, new ExponentialBackoff(retryInterval), numRetries, entityClientCacheConfig); + restClient, + new ExponentialBackoff(retryInterval), + numRetries, + entityClientCacheConfig, + batchGetV2Size); } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/ConfigEntityRegistryFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/ConfigEntityRegistryFactory.java index f78dcf1b80156..f1518f9c8f9d7 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/ConfigEntityRegistryFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/ConfigEntityRegistryFactory.java @@ -1,10 +1,17 @@ package com.linkedin.gms.factory.entityregistry; +import com.datahub.plugins.metadata.aspect.SpringPluginFactory; +import com.linkedin.metadata.aspect.plugins.PluginFactory; +import com.linkedin.metadata.aspect.plugins.config.PluginConfiguration; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistryException; import java.io.IOException; +import java.util.List; +import java.util.function.BiFunction; import javax.annotation.Nonnull; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.ApplicationContext; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.core.io.Resource; @@ -12,6 +19,8 @@ @Configuration public class ConfigEntityRegistryFactory { + @Autowired private ApplicationContext applicationContext; + @Value("${configEntityRegistry.path}") private String entityRegistryConfigPath; @@ -21,10 +30,13 @@ public class ConfigEntityRegistryFactory { @Bean(name = "configEntityRegistry") @Nonnull protected ConfigEntityRegistry getInstance() throws IOException, EntityRegistryException { + BiFunction, PluginFactory> pluginFactoryProvider = + (config, loaders) -> new SpringPluginFactory(applicationContext, config, loaders); if (entityRegistryConfigPath != null) { - return new ConfigEntityRegistry(entityRegistryConfigPath); + return new ConfigEntityRegistry(entityRegistryConfigPath, pluginFactoryProvider); } else { - return new ConfigEntityRegistry(entityRegistryResource.getInputStream()); + return new ConfigEntityRegistry( + entityRegistryResource.getInputStream(), pluginFactoryProvider); } } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/PluginEntityRegistryFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/PluginEntityRegistryFactory.java index addc08246f1ae..0e9664cefdb7a 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/PluginEntityRegistryFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entityregistry/PluginEntityRegistryFactory.java @@ -1,10 +1,15 @@ package com.linkedin.gms.factory.entityregistry; +import com.datahub.plugins.metadata.aspect.SpringPluginFactory; import com.linkedin.gms.factory.config.ConfigurationProvider; +import com.linkedin.metadata.aspect.plugins.PluginFactory; +import com.linkedin.metadata.aspect.plugins.config.PluginConfiguration; import com.linkedin.metadata.config.EntityRegistryPluginConfiguration; import com.linkedin.metadata.models.registry.PluginEntityRegistryLoader; import java.io.FileNotFoundException; import java.net.MalformedURLException; +import java.util.List; +import java.util.function.BiFunction; import javax.annotation.Nonnull; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; @@ -18,11 +23,16 @@ public class PluginEntityRegistryFactory { @Bean(name = "pluginEntityRegistry") @Nonnull - protected PluginEntityRegistryLoader getInstance(ConfigurationProvider configurationProvider) + protected PluginEntityRegistryLoader getInstance( + @Nonnull final ConfigurationProvider configurationProvider) throws FileNotFoundException, MalformedURLException { EntityRegistryPluginConfiguration pluginConfiguration = configurationProvider.getDatahub().getPlugin().getEntityRegistry(); + BiFunction, PluginFactory> pluginFactoryProvider = + (config, loaders) -> new SpringPluginFactory(null, config, loaders); return new PluginEntityRegistryLoader( - pluginConfiguration.getPath(), pluginConfiguration.getLoadDelaySeconds()); + pluginConfiguration.getPath(), + pluginConfiguration.getLoadDelaySeconds(), + pluginFactoryProvider); } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java index 678d442396d0f..1ac6010be92e5 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/graphql/GraphQLEngineFactory.java @@ -21,6 +21,7 @@ import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory; import com.linkedin.gms.factory.recommendation.RecommendationServiceFactory; import com.linkedin.metadata.client.UsageStatsJavaClient; +import com.linkedin.metadata.connection.ConnectionService; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.graph.GraphClient; import com.linkedin.metadata.graph.GraphService; @@ -181,6 +182,10 @@ public class GraphQLEngineFactory { @Qualifier("businessAttributeService") private BusinessAttributeService businessAttributeService; + @Autowired + @Qualifier("connectionService") + private ConnectionService _connectionService; + @Bean(name = "graphQLEngine") @Nonnull protected GraphQLEngine graphQLEngine( @@ -233,6 +238,7 @@ protected GraphQLEngine graphQLEngine( configProvider.getGraphQL().getQuery().getComplexityLimit()); args.setGraphQLQueryDepthLimit(configProvider.getGraphQL().getQuery().getDepthLimit()); args.setBusinessAttributeService(businessAttributeService); + args.setConnectionService(_connectionService); return new GmsGraphQLEngine(args).builder().build(); } } diff --git a/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphQLController.java b/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphQLController.java index 31e4b58a56e74..e53e59b863c9c 100644 --- a/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphQLController.java +++ b/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/GraphQLController.java @@ -28,6 +28,7 @@ import java.util.concurrent.TimeUnit; import javax.annotation.Nonnull; import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.StringUtils; import org.springframework.http.HttpEntity; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; @@ -54,6 +55,8 @@ public GraphQLController() { @Named("systemOperationContext") private OperationContext systemOperationContext; + private static final int MAX_LOG_WIDTH = 512; + @PostMapping(value = "/graphql", produces = "application/json;charset=utf-8") CompletableFuture> postGraphQL(HttpEntity httpEntity) { @@ -70,7 +73,7 @@ CompletableFuture> postGraphQL(HttpEntity httpEnt try { bodyJson = mapper.readTree(jsonStr); } catch (JsonProcessingException e) { - log.error(String.format("Failed to parse json %s", jsonStr)); + log.error("Failed to parse json {}", jsonStr); return CompletableFuture.completedFuture(new ResponseEntity<>(HttpStatus.BAD_REQUEST)); } @@ -85,6 +88,7 @@ CompletableFuture> postGraphQL(HttpEntity httpEnt if (queryJson == null) { return CompletableFuture.completedFuture(new ResponseEntity<>(HttpStatus.BAD_REQUEST)); } + final String query = queryJson.asText(); /* * Extract "operationName" field @@ -105,8 +109,6 @@ CompletableFuture> postGraphQL(HttpEntity httpEnt .convertValue(variablesJson, new TypeReference>() {}) : Collections.emptyMap(); - log.debug(String.format("Executing graphQL query: %s, variables: %s", queryJson, variables)); - /* * Init QueryContext */ @@ -114,50 +116,52 @@ CompletableFuture> postGraphQL(HttpEntity httpEnt SpringQueryContext context = new SpringQueryContext( - true, - authentication, - _authorizerChain, - systemOperationContext, - queryJson.asText(), - variables); + true, authentication, _authorizerChain, systemOperationContext, query, variables); Span.current().setAttribute("actor.urn", context.getActorUrn()); + // operationName is an optional field only required if multiple operations are present + final String queryName = operationName != null ? operationName : context.getQueryName(); + final String threadName = Thread.currentThread().getName(); + log.info("Processing request, operation: {}, actor urn: {}", queryName, context.getActorUrn()); + log.debug("Query: {}, variables: {}", query, variables); + return CompletableFuture.supplyAsync( () -> { + log.info("Executing operation {} for {}", queryName, threadName); + /* * Execute GraphQL Query */ ExecutionResult executionResult = - _engine.execute(queryJson.asText(), operationName, variables, context); + _engine.execute(query, operationName, variables, context); if (executionResult.getErrors().size() != 0) { // There were GraphQL errors. Report in error logs. log.error( - String.format( - "Errors while executing graphQL query: %s, result: %s, errors: %s", - queryJson, executionResult.toSpecification(), executionResult.getErrors())); - } else { - log.debug( - String.format( - "Executed graphQL query: %s, result: %s", - queryJson, executionResult.toSpecification())); + "Errors while executing query: {}, result: {}, errors: {}", + StringUtils.abbreviate(query, MAX_LOG_WIDTH), + executionResult.toSpecification(), + executionResult.getErrors()); } /* * Format & Return Response */ try { - submitMetrics(executionResult); + long totalDuration = submitMetrics(executionResult); + String executionTook = totalDuration > 0 ? " in " + totalDuration + " ms" : ""; + log.info("Executed operation {}" + executionTook, queryName); // Remove tracing from response to reduce bulk, not used by the frontend executionResult.getExtensions().remove("tracing"); String responseBodyStr = new ObjectMapper().writeValueAsString(executionResult.toSpecification()); + log.info("Operation {} execution result size: {}", queryName, responseBodyStr.length()); + log.trace("Execution result: {}", responseBodyStr); return new ResponseEntity<>(responseBodyStr, HttpStatus.OK); } catch (IllegalArgumentException | JsonProcessingException e) { log.error( - String.format( - "Failed to convert execution result %s into a JsonNode", - executionResult.toSpecification())); + "Failed to convert execution result {} into a JsonNode", + executionResult.toSpecification()); return new ResponseEntity<>(HttpStatus.SERVICE_UNAVAILABLE); } }); @@ -197,7 +201,7 @@ private void observeErrors(ExecutionResult executionResult) { } @SuppressWarnings("unchecked") - private void submitMetrics(ExecutionResult executionResult) { + private long submitMetrics(ExecutionResult executionResult) { try { observeErrors(executionResult); MetricUtils.get().counter(MetricRegistry.name(this.getClass(), "call")).inc(); @@ -220,6 +224,7 @@ private void submitMetrics(ExecutionResult executionResult) { MetricUtils.get() .histogram(MetricRegistry.name(this.getClass(), fieldName)) .update(totalDuration); + return totalDuration; } } catch (Exception e) { MetricUtils.get() @@ -227,5 +232,7 @@ private void submitMetrics(ExecutionResult executionResult) { .inc(); log.error("Unable to submit metrics for GraphQL call.", e); } + + return -1; } } diff --git a/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/SpringQueryContext.java b/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/SpringQueryContext.java index b815563818e78..591e1158879d4 100644 --- a/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/SpringQueryContext.java +++ b/metadata-service/graphql-servlet-impl/src/main/java/com/datahub/graphql/SpringQueryContext.java @@ -17,6 +17,7 @@ public class SpringQueryContext implements QueryContext { private final boolean isAuthenticated; private final Authentication authentication; private final Authorizer authorizer; + @Getter private final String queryName; @Nonnull private final OperationContext operationContext; public SpringQueryContext( @@ -30,7 +31,7 @@ public SpringQueryContext( this.authentication = authentication; this.authorizer = authorizer; - String queryName = + this.queryName = new Parser() .parseDocument(jsonQuery).getDefinitions().stream() .filter(def -> def instanceof OperationDefinition) diff --git a/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIEntityTestConfiguration.java b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIEntityTestConfiguration.java index 48481e8e5b576..3fba99a4b197f 100644 --- a/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIEntityTestConfiguration.java +++ b/metadata-service/openapi-entity-servlet/src/test/java/io/datahubproject/openapi/config/OpenAPIEntityTestConfiguration.java @@ -90,7 +90,7 @@ public EntityRegistry entityRegistry() throws EntityRegistryException, Interrupt dependency. */ PluginEntityRegistryLoader custom = - new PluginEntityRegistryLoader(getClass().getResource("/custom-model").getFile(), 60); + new PluginEntityRegistryLoader(getClass().getResource("/custom-model").getFile(), 60, null); ConfigEntityRegistry standard = new ConfigEntityRegistry( diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java index a0191acfe5fed..1718beeaeaba3 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/operations/elastic/OperationsController.java @@ -298,11 +298,7 @@ public ResponseEntity> restoreIndices( .gePitEpochMs(gePitEpochMs) .lePitEpochMs(lePitEpochMs); - return ResponseEntity.of( - Optional.of( - entityService - .streamRestoreIndices(opContext, args, log::info) - .collect(Collectors.toList()))); + return ResponseEntity.of(Optional.of(entityService.restoreIndices(opContext, args, log::info))); } @Tag(name = "RestoreIndices") diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java index adc3164c03b11..41cf972079c25 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v2/controller/EntityController.java @@ -27,8 +27,8 @@ import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.aspect.patch.GenericJsonPatch; import com.linkedin.metadata.aspect.patch.template.common.GenericPatchTemplate; +import com.linkedin.metadata.entity.EntityApiUtils; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.entity.EntityUtils; import com.linkedin.metadata.entity.IngestResult; import com.linkedin.metadata.entity.UpdateAspectResult; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; @@ -294,7 +294,9 @@ public ResponseEntity getAspect( .flatMap( e -> e.getAspects().entrySet().stream() - .filter(entry -> entry.getKey().equals(aspectName)) + .filter( + entry -> + entry.getKey().equals(lookupAspectSpec(urn, aspectName).getName())) .map(Map.Entry::getValue) .findFirst())); } @@ -324,7 +326,7 @@ public ResponseEntity headAspect( authentication, true); - return exists(opContext, urn, aspectName) + return exists(opContext, urn, lookupAspectSpec(urn, aspectName).getName()) ? ResponseEntity.noContent().build() : ResponseEntity.notFound().build(); } @@ -414,7 +416,8 @@ public void deleteAspect( authentication, true); - entityService.deleteAspect(opContext, entityUrn, aspectName, Map.of(), true); + entityService.deleteAspect( + opContext, entityUrn, lookupAspectSpec(urn, aspectName).getName(), Map.of(), true); } @Tag(name = "Generic Aspects") @@ -450,7 +453,7 @@ public ResponseEntity createAspect( authentication, true); - AspectSpec aspectSpec = entitySpec.getAspectSpec(aspectName); + AspectSpec aspectSpec = lookupAspectSpec(entitySpec, aspectName); ChangeMCP upsert = toUpsertItem( opContext.getRetrieverContext().get().getAspectRetriever(), @@ -521,9 +524,9 @@ public ResponseEntity patchAspect( authentication, true); - RecordTemplate currentValue = entityService.getAspect(opContext, urn, aspectName, 0); + AspectSpec aspectSpec = lookupAspectSpec(entitySpec, aspectName); + RecordTemplate currentValue = entityService.getAspect(opContext, urn, aspectSpec.getName(), 0); - AspectSpec aspectSpec = entitySpec.getAspectSpec(aspectName); GenericPatchTemplate genericPatchTemplate = GenericPatchTemplate.builder() .genericJsonPatch(patch) @@ -560,7 +563,7 @@ public ResponseEntity patchAspect( .build( objectMapper, Map.of( - aspectName, + aspectSpec.getName(), Pair.of( result.getNewValue(), withSystemMetadata ? result.getNewSystemMetadata() : null))))); @@ -598,7 +601,11 @@ private List toRecordTemplates( Map> aspects = entityService.getLatestEnvelopedAspects( - opContext, urnsSet, resolveAspectNames(urnsSet, aspectNames)); + opContext, + urnsSet, + resolveAspectNames(urnsSet, aspectNames).stream() + .map(AspectSpec::getName) + .collect(Collectors.toSet())); return urns.stream() .map( @@ -612,18 +619,21 @@ private List toRecordTemplates( } } - private Set resolveAspectNames(Set urns, Set requestedNames) { - if (requestedNames.isEmpty()) { + private Set resolveAspectNames(Set urns, Set requestedAspectNames) { + if (requestedAspectNames.isEmpty()) { return urns.stream() .flatMap(u -> entityRegistry.getEntitySpec(u.getEntityType()).getAspectSpecs().stream()) - .map(AspectSpec::getName) .collect(Collectors.toSet()); } else { // ensure key is always present return Stream.concat( - requestedNames.stream(), urns.stream() - .map(u -> entityRegistry.getEntitySpec(u.getEntityType()).getKeyAspectName())) + .flatMap( + urn -> + requestedAspectNames.stream() + .map(aspectName -> lookupAspectSpec(urn, aspectName))), + urns.stream() + .map(u -> entityRegistry.getEntitySpec(u.getEntityType()).getKeyAspectSpec())) .collect(Collectors.toSet()); } } @@ -642,7 +652,7 @@ private Map> toAspectMap( } private AspectSpec lookupAspectSpec(Urn urn, String aspectName) { - return entityRegistry.getEntitySpec(urn.getEntityType()).getAspectSpec(aspectName); + return lookupAspectSpec(entityRegistry.getEntitySpec(urn.getEntityType()), aspectName); } private RecordTemplate toRecordTemplate(AspectSpec aspectSpec, EnvelopedAspect envelopedAspect) { @@ -677,8 +687,7 @@ private ChangeMCP toUpsertItem( @Nonnull AspectSpec aspectSpec, @Nullable RecordTemplate currentValue, @Nonnull GenericPatchTemplate genericPatchTemplate, - @Nonnull Actor actor) - throws URISyntaxException { + @Nonnull Actor actor) { return ChangeItemImpl.fromPatch( urn, aspectSpec, @@ -722,7 +731,7 @@ private AspectsBatch toMCPBatch( if (aspect.getValue().has("systemMetadata")) { builder.systemMetadata( - EntityUtils.parseSystemMetadata( + EntityApiUtils.parseSystemMetadata( objectMapper.writeValueAsString(aspect.getValue().get("systemMetadata")))); } @@ -762,4 +771,18 @@ public List toEntityListResponse( } return responseList; } + + /** + * Case-insensitive fallback + * + * @return + */ + private static AspectSpec lookupAspectSpec(EntitySpec entitySpec, String aspectName) { + return entitySpec.getAspectSpec(aspectName) != null + ? entitySpec.getAspectSpec(aspectName) + : entitySpec.getAspectSpecs().stream() + .filter(aspec -> aspec.getName().toLowerCase().equals(aspectName)) + .findFirst() + .get(); + } } diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java index 796406ed746c9..df3f6445a855a 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/OpenAPIV3Generator.java @@ -20,8 +20,11 @@ import io.swagger.v3.oas.models.responses.ApiResponse; import io.swagger.v3.oas.models.responses.ApiResponses; import java.math.BigDecimal; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -72,7 +75,8 @@ public static OpenAPI generateOpenApiSpec(EntityRegistry entityRegistry) { final Components components = new Components(); // --> Aspect components // TODO: Correct handling of SystemMetadata and SortOrder - components.addSchemas("SystemMetadata", new Schema().type(TYPE_STRING)); + components.addSchemas( + "SystemMetadata", new Schema().type(TYPE_OBJECT).additionalProperties(true)); components.addSchemas("SortOrder", new Schema()._enum(List.of("ASCENDING", "DESCENDING"))); components.addSchemas("AspectPatch", buildAspectPatchSchema()); entityRegistry @@ -84,10 +88,10 @@ public static OpenAPI generateOpenApiSpec(EntityRegistry entityRegistry) { addAspectSchemas(components, a); components.addSchemas( upperAspectName + ASPECT_REQUEST_SUFFIX, - buildAspectRefSchema(upperAspectName, false)); + buildAspectRefRequestSchema(upperAspectName)); components.addSchemas( upperAspectName + ASPECT_RESPONSE_SUFFIX, - buildAspectRefSchema(upperAspectName, true)); + buildAspectRefResponseSchema(upperAspectName)); }); // --> Entity components entityRegistry.getEntitySpecs().values().stream() @@ -182,7 +186,6 @@ private static PathItem buildSingleEntityPath(final EntitySpec entity) { final Operation getOperation = new Operation() .summary(String.format("Get %s.", upperFirst)) - .operationId(String.format("get%s", upperFirst)) .parameters(parameters) .tags(List.of(entity.getName() + " Entity")) .responses(new ApiResponses().addApiResponse("200", successApiResponse)); @@ -199,7 +202,6 @@ private static PathItem buildSingleEntityPath(final EntitySpec entity) { final Operation headOperation = new Operation() .summary(String.format("%s existence.", upperFirst)) - .operationId(String.format("head%s", upperFirst)) .parameters( List.of( new Parameter() @@ -220,7 +222,6 @@ private static PathItem buildSingleEntityPath(final EntitySpec entity) { final Operation deleteOperation = new Operation() .summary(String.format("Delete entity %s", upperFirst)) - .operationId(String.format("delete%s", upperFirst)) .parameters( List.of( new Parameter() @@ -271,7 +272,6 @@ private static PathItem buildListEntityPath(final EntitySpec entity) { result.setGet( new Operation() .summary(String.format("Scroll/List %s.", upperFirst)) - .operationId("scroll") .parameters(parameters) .tags(List.of(entity.getName() + " Entity")) .responses(new ApiResponses().addApiResponse("200", successApiResponse))); @@ -318,7 +318,7 @@ private static PathItem buildListEntityPath(final EntitySpec entity) { .parameters( List.of( new Parameter() - .in(NAME_ASYNC) + .in(NAME_QUERY) .name("async") .description("Use async ingestion for high throughput.") .schema(new Schema().type(TYPE_BOOLEAN)._default(true)), @@ -328,7 +328,6 @@ private static PathItem buildListEntityPath(final EntitySpec entity) { .description("Include systemMetadata with response.") .schema(new Schema().type(TYPE_BOOLEAN)._default(false)))) .summary("Create " + upperFirst + " entities.") - .operationId("createEntities") .tags(List.of(entity.getName() + " Entity")) .requestBody( new RequestBody() @@ -362,7 +361,7 @@ private static void addExtraParameters(final Components components) { .schema( new Schema() .type(TYPE_ARRAY) - ._default(PROPERTY_URN) + ._default(List.of(PROPERTY_URN)) .items( new Schema<>() .type(TYPE_STRING) @@ -383,7 +382,7 @@ private static void addExtraParameters(final Components components) { .in(NAME_QUERY) .name("count") .description("Number of items per page.") - .example("10") + .example(10) .schema(new Schema().type(TYPE_INTEGER)._default(10).minimum(new BigDecimal(1)))); components.addParameters( "ScrollQuery" + MODEL_VERSION, @@ -409,7 +408,11 @@ private static Parameter buildParameterSchema( final Schema schema = new Schema() .type(TYPE_ARRAY) - .items(new Schema().type(TYPE_STRING)._enum(aspectNames)._default(aspectNames)); + .items( + new Schema() + .type(TYPE_STRING) + ._enum(aspectNames) + ._default(aspectNames.stream().findFirst().orElse(null))); return new Parameter() .in(NAME_QUERY) .name("aspects") @@ -434,6 +437,27 @@ private static void addAspectSchemas(final Components components, final AspectSp final String newDefinition = definition.replaceAll("definitions", "components/schemas"); Schema s = Json.mapper().readValue(newDefinition, Schema.class); + Set requiredNames = + Optional.ofNullable(s.getRequired()) + .map(names -> Set.copyOf(names)) + .orElse(new HashSet()); + Map properties = + Optional.ofNullable(s.getProperties()).orElse(new HashMap<>()); + properties.forEach( + (name, schema) -> { + String $ref = schema.get$ref(); + boolean isNameRequired = requiredNames.contains(name); + if ($ref != null && !isNameRequired) { + // A non-required $ref property must be wrapped in a { allOf: [ $ref ] } + // object to allow the + // property to be marked as nullable + schema.setType(TYPE_OBJECT); + schema.set$ref(null); + schema.setAllOf(List.of(new Schema().$ref($ref))); + } + schema.setNullable(!isNameRequired); + }); + components.addSchemas(n, s); } catch (Exception e) { throw new RuntimeException(e); @@ -444,24 +468,27 @@ private static void addAspectSchemas(final Components components, final AspectSp } } - private static Schema buildAspectRefSchema( - final String aspectName, final boolean withSystemMetadata) { + private static Schema buildAspectRefResponseSchema(final String aspectName) { final Schema result = new Schema<>() .type(TYPE_OBJECT) .description(ASPECT_DESCRIPTION) .required(List.of(PROPERTY_VALUE)) .addProperty(PROPERTY_VALUE, new Schema<>().$ref(PATH_DEFINITIONS + aspectName)); - if (withSystemMetadata) { - result.addProperty( - "systemMetadata", - new Schema<>() - .$ref(PATH_DEFINITIONS + "SystemMetadata") - .description("System metadata for the aspect.")); - } + result.addProperty( + "systemMetadata", + new Schema<>() + .type(TYPE_OBJECT) + .allOf(List.of(new Schema().$ref(PATH_DEFINITIONS + "SystemMetadata"))) + .description("System metadata for the aspect.") + .nullable(true)); return result; } + private static Schema buildAspectRefRequestSchema(final String aspectName) { + return new Schema<>().$ref(PATH_DEFINITIONS + aspectName); + } + private static Schema buildEntitySchema( final EntitySpec entity, Set aspectNames, final boolean withSystemMetadata) { final Map properties = @@ -575,7 +602,6 @@ private static PathItem buildSingleEntityAspectPath( final Operation getOperation = new Operation() .summary(String.format("Get %s for %s.", aspect, entity.getName())) - .operationId(String.format("get%s", upperFirstAspect)) .tags(tags) .parameters(List.of(getParameter)) .responses(new ApiResponses().addApiResponse("200", successApiResponse)); @@ -591,7 +617,6 @@ private static PathItem buildSingleEntityAspectPath( final Operation headOperation = new Operation() .summary(String.format("%s on %s existence.", aspect, upperFirstEntity)) - .operationId(String.format("head%s", upperFirstAspect)) .tags(tags) .responses( new ApiResponses() @@ -605,7 +630,6 @@ private static PathItem buildSingleEntityAspectPath( final Operation deleteOperation = new Operation() .summary(String.format("Delete %s on entity %s", aspect, upperFirstEntity)) - .operationId(String.format("delete%s", upperFirstAspect)) .tags(tags) .responses(new ApiResponses().addApiResponse("200", successDeleteResponse)); // Post Operation @@ -641,7 +665,6 @@ private static PathItem buildSingleEntityAspectPath( final Operation postOperation = new Operation() .summary(String.format("Create aspect %s on %s ", aspect, upperFirstEntity)) - .operationId(String.format("create%s", upperFirstAspect)) .tags(tags) .requestBody(requestBody) .responses(new ApiResponses().addApiResponse("201", successPostResponse)); @@ -680,7 +703,6 @@ private static PathItem buildSingleEntityAspectPath( .description("Include systemMetadata with response.") .schema(new Schema().type(TYPE_BOOLEAN)._default(false)))) .summary(String.format("Patch aspect %s on %s ", aspect, upperFirstEntity)) - .operationId(String.format("patch%s", upperFirstAspect)) .tags(tags) .requestBody(patchRequestBody) .responses(new ApiResponses().addApiResponse("200", successPatchResponse)); diff --git a/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/OpenAPIV3GeneratorTest.java b/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/OpenAPIV3GeneratorTest.java index c05252fe9c09f..0ce62f5cb10f6 100644 --- a/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/OpenAPIV3GeneratorTest.java +++ b/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/OpenAPIV3GeneratorTest.java @@ -1,14 +1,20 @@ package io.datahubproject.openapi.v3; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; import static org.testng.Assert.assertTrue; import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; import io.swagger.v3.core.util.Yaml; import io.swagger.v3.oas.models.OpenAPI; +import io.swagger.v3.oas.models.media.Schema; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; +import java.util.List; +import java.util.Map; import org.testng.annotations.BeforeTest; import org.testng.annotations.Test; @@ -40,5 +46,42 @@ public void testOpenApiSpecBuilder() throws Exception { assertTrue(openAPI.getComponents().getSchemas().size() > 900); assertTrue(openAPI.getComponents().getParameters().size() > 50); assertTrue(openAPI.getPaths().size() > 500); + + Schema datasetPropertiesSchema = openAPI.getComponents().getSchemas().get("DatasetProperties"); + List requiredNames = datasetPropertiesSchema.getRequired(); + Map properties = datasetPropertiesSchema.getProperties(); + + // Assert required properties are non-nullable + Schema customProperties = properties.get("customProperties"); + assertTrue(requiredNames.contains("customProperties")); + assertFalse(customProperties.getNullable()); + + // Assert non-required properties are nullable + Schema name = properties.get("name"); + assertFalse(requiredNames.contains("name")); + assertTrue(name.getNullable()); + + // Assert non-required $ref properties are replaced by nullable { allOf: [ $ref ] } objects + Schema created = properties.get("created"); + assertFalse(requiredNames.contains("created")); + assertEquals("object", created.getType()); + assertNull(created.get$ref()); + assertEquals(List.of(new Schema().$ref("#/components/schemas/TimeStamp")), created.getAllOf()); + assertTrue(created.getNullable()); + + // Assert systemMetadata property on response schema is optional. + Map datasetPropertiesResponseSchemaProps = + openAPI + .getComponents() + .getSchemas() + .get("DatasetPropertiesAspectResponse_v3") + .getProperties(); + Schema systemMetadata = datasetPropertiesResponseSchemaProps.get("systemMetadata"); + assertEquals("object", systemMetadata.getType()); + assertNull(systemMetadata.get$ref()); + assertEquals( + List.of(new Schema().$ref("#/components/schemas/SystemMetadata")), + systemMetadata.getAllOf()); + assertTrue(systemMetadata.getNullable()); } } diff --git a/metadata-service/plugin/build.gradle b/metadata-service/plugin/build.gradle index 3f91b8f6ae6ba..f519eba4921d2 100644 --- a/metadata-service/plugin/build.gradle +++ b/metadata-service/plugin/build.gradle @@ -6,6 +6,8 @@ dependencies { implementation 'org.apache.commons:commons-lang3:3.12.0' implementation project(path: ':metadata-auth:auth-api') + implementation project(':entity-registry') + implementation externalDependency.springContext implementation externalDependency.guava implementation externalDependency.jacksonDataBind implementation externalDependency.jacksonDataFormatYaml diff --git a/metadata-service/plugin/src/main/java/com/datahub/plugins/metadata/aspect/SpringPluginFactory.java b/metadata-service/plugin/src/main/java/com/datahub/plugins/metadata/aspect/SpringPluginFactory.java new file mode 100644 index 0000000000000..8a080c8d9076e --- /dev/null +++ b/metadata-service/plugin/src/main/java/com/datahub/plugins/metadata/aspect/SpringPluginFactory.java @@ -0,0 +1,132 @@ +package com.datahub.plugins.metadata.aspect; + +import com.linkedin.metadata.aspect.plugins.PluginFactory; +import com.linkedin.metadata.aspect.plugins.PluginSpec; +import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; +import com.linkedin.metadata.aspect.plugins.config.PluginConfiguration; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.extern.slf4j.Slf4j; +import org.springframework.context.ApplicationContext; +import org.springframework.context.annotation.AnnotationConfigApplicationContext; + +@Slf4j +public class SpringPluginFactory extends PluginFactory { + + @Nullable private final ApplicationContext springApplicationContext; + + public SpringPluginFactory( + @Nullable ApplicationContext springApplicationContext, + @Nullable PluginConfiguration pluginConfiguration, + @Nonnull List classLoaders) { + super(pluginConfiguration, classLoaders); + + String[] packageScan = + extractPackageScan( + Optional.ofNullable(pluginConfiguration) + .map(PluginConfiguration::streamAll) + .orElse(Stream.of())) + .toArray(String[]::new); + + if (springApplicationContext != null || packageScan.length == 0) { + this.springApplicationContext = springApplicationContext; + } else { + AnnotationConfigApplicationContext rootContext = null; + + for (ClassLoader classLoader : classLoaders) { + AnnotationConfigApplicationContext applicationContext = + new AnnotationConfigApplicationContext(); + applicationContext.setId("custom-plugin"); + if (rootContext != null) { + applicationContext.setParent(rootContext); + } + applicationContext.setClassLoader(classLoader); + applicationContext.scan(packageScan); + rootContext = applicationContext; + } + rootContext.refresh(); + this.springApplicationContext = rootContext; + } + + loadPlugins(); + } + + private static Stream extractPackageScan(Stream configStream) { + return filterSpringConfigs(configStream) + .map(AspectPluginConfig::getPackageScan) + .filter(Objects::nonNull) + .flatMap(Collection::stream) + .distinct(); + } + + private static Stream filterSpringConfigs( + Stream configStream) { + return configStream.filter( + config -> config.getSpring() != null && config.getSpring().isEnabled()); + } + + /** + * Override to inject classes from Spring + * + * @param baseClazz + * @param configs + * @param packageNames + * @return + * @param + */ + @Override + protected List build( + Class baseClazz, List packageNames, List configs) { + + // load non-spring + List result = new ArrayList<>(super.build(baseClazz, packageNames, configs)); + + if (springApplicationContext == null) { + return result; + } + + // consider Spring dependency injection + for (AspectPluginConfig config : + filterSpringConfigs(configs.stream()).collect(Collectors.toSet())) { + boolean loaded = false; + + for (ClassLoader classLoader : getClassLoaders()) { + try { + Class clazz = classLoader.loadClass(config.getClassName()); + + final T plugin; + if (config.getSpring().getName() == null) { + plugin = (T) springApplicationContext.getBean(clazz); + } else { + plugin = (T) springApplicationContext.getBean(config.getSpring().getName(), clazz); + } + + if (plugin.enabled()) { + result.add((T) plugin.setConfig(config)); + } + + loaded = true; + break; + } catch (ClassNotFoundException e) { + log.warn( + "Failed to load class {} from loader {}", + config.getClassName(), + classLoader.getName()); + } + } + + if (!loaded) { + log.error("Failed to load Spring plugin {}!", config.getClassName()); + } + } + + return result; + } +} diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json index becdcdd0215fd..00b434d30356f 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json @@ -834,7 +834,7 @@ "name" : "FabricType", "namespace" : "com.linkedin.common", "doc" : "Fabric group type", - "symbols" : [ "DEV", "TEST", "QA", "UAT", "EI", "PRE", "STG", "NON_PROD", "PROD", "CORP" ], + "symbols" : [ "DEV", "TEST", "QA", "UAT", "EI", "PRE", "STG", "NON_PROD", "PROD", "CORP", "RVW" ], "symbolDocs" : { "CORP" : "Designates corporation fabrics", "DEV" : "Designates development fabrics", @@ -845,7 +845,8 @@ "QA" : "Designates quality assurance fabrics", "STG" : "Designates staging fabrics", "TEST" : "Designates testing fabrics", - "UAT" : "Designates user acceptance testing fabrics" + "UAT" : "Designates user acceptance testing fabrics", + "RVW" : "Designates review fabrics" } }, { "type" : "record", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json index 96b9b570b2bf1..ffbcdd1b2adb3 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json @@ -834,7 +834,7 @@ "name" : "FabricType", "namespace" : "com.linkedin.common", "doc" : "Fabric group type", - "symbols" : [ "DEV", "TEST", "QA", "UAT", "EI", "PRE", "STG", "NON_PROD", "PROD", "CORP" ], + "symbols" : [ "DEV", "TEST", "QA", "UAT", "EI", "PRE", "STG", "NON_PROD", "PROD", "CORP", "RVW" ], "symbolDocs" : { "CORP" : "Designates corporation fabrics", "DEV" : "Designates development fabrics", @@ -845,7 +845,8 @@ "QA" : "Designates quality assurance fabrics", "STG" : "Designates staging fabrics", "TEST" : "Designates testing fabrics", - "UAT" : "Designates user acceptance testing fabrics" + "UAT" : "Designates user acceptance testing fabrics", + "RVW" : "Designates review fabrics" } }, { "type" : "record", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json index 6100073f1fbc9..0139072b2ae15 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json @@ -576,7 +576,7 @@ "name" : "FabricType", "namespace" : "com.linkedin.common", "doc" : "Fabric group type", - "symbols" : [ "DEV", "TEST", "QA", "UAT", "EI", "PRE", "STG", "NON_PROD", "PROD", "CORP" ], + "symbols" : [ "DEV", "TEST", "QA", "UAT", "EI", "PRE", "STG", "NON_PROD", "PROD", "CORP", "RVW" ], "symbolDocs" : { "CORP" : "Designates corporation fabrics", "DEV" : "Designates development fabrics", @@ -587,7 +587,8 @@ "QA" : "Designates quality assurance fabrics", "STG" : "Designates staging fabrics", "TEST" : "Designates testing fabrics", - "UAT" : "Designates user acceptance testing fabrics" + "UAT" : "Designates user acceptance testing fabrics", + "RVW" : "Designates review fabrics" } }, { "type" : "record", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json index 0573a342da420..1caeed2570317 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json @@ -576,7 +576,7 @@ "name" : "FabricType", "namespace" : "com.linkedin.common", "doc" : "Fabric group type", - "symbols" : [ "DEV", "TEST", "QA", "UAT", "EI", "PRE", "STG", "NON_PROD", "PROD", "CORP" ], + "symbols" : [ "DEV", "TEST", "QA", "UAT", "EI", "PRE", "STG", "NON_PROD", "PROD", "CORP", "RVW" ], "symbolDocs" : { "CORP" : "Designates corporation fabrics", "DEV" : "Designates development fabrics", @@ -587,7 +587,8 @@ "QA" : "Designates quality assurance fabrics", "STG" : "Designates staging fabrics", "TEST" : "Designates testing fabrics", - "UAT" : "Designates user acceptance testing fabrics" + "UAT" : "Designates user acceptance testing fabrics", + "RVW" : "Designates review fabrics" } }, { "type" : "record", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json index 4a1f24d527b89..1592333988b4c 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json @@ -834,7 +834,7 @@ "name" : "FabricType", "namespace" : "com.linkedin.common", "doc" : "Fabric group type", - "symbols" : [ "DEV", "TEST", "QA", "UAT", "EI", "PRE", "STG", "NON_PROD", "PROD", "CORP" ], + "symbols" : [ "DEV", "TEST", "QA", "UAT", "EI", "PRE", "STG", "NON_PROD", "PROD", "CORP", "RVW" ], "symbolDocs" : { "CORP" : "Designates corporation fabrics", "DEV" : "Designates development fabrics", @@ -845,7 +845,8 @@ "QA" : "Designates quality assurance fabrics", "STG" : "Designates staging fabrics", "TEST" : "Designates testing fabrics", - "UAT" : "Designates user acceptance testing fabrics" + "UAT" : "Designates user acceptance testing fabrics", + "RVW" : "Designates review fabrics" } }, { "type" : "record", diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java index 21246407f2029..70fae208ad77a 100644 --- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java +++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java @@ -3,6 +3,7 @@ import com.datahub.plugins.auth.authorization.Authorizer; import com.datahub.util.RecordUtils; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterators; import com.linkedin.common.VersionedUrn; import com.linkedin.common.client.BaseClient; import com.linkedin.common.urn.Urn; @@ -108,11 +109,15 @@ public class RestliEntityClient extends BaseClient implements EntityClient { new PlatformRequestBuilders(); private static final RunsRequestBuilders RUNS_REQUEST_BUILDERS = new RunsRequestBuilders(); + private final int batchGetV2Size; + public RestliEntityClient( @Nonnull final Client restliClient, @Nonnull final BackoffPolicy backoffPolicy, - int retryCount) { + int retryCount, + int batchGetV2Size) { super(restliClient, backoffPolicy, retryCount); + this.batchGetV2Size = Math.max(1, batchGetV2Size); } @Override @@ -195,10 +200,10 @@ public Map batchGet( /** * Batch get a set of aspects for multiple entities. * + * @param opContext operation's context * @param entityName the entity type to fetch * @param urns the urns of the entities to batch get * @param aspectNames the aspect names to batch get - * @param authentication the authentication to include in the request to the Metadata Service * @throws RemoteInvocationException when unable to execute request */ @Override @@ -210,29 +215,43 @@ public Map batchGetV2( @Nullable final Set aspectNames) throws RemoteInvocationException, URISyntaxException { - final EntitiesV2BatchGetRequestBuilder requestBuilder = - ENTITIES_V2_REQUEST_BUILDERS - .batchGet() - .aspectsParam(aspectNames) - .ids(urns.stream().map(Urn::toString).collect(Collectors.toList())); - - return sendClientRequest(requestBuilder, opContext.getSessionAuthentication()) - .getEntity() - .getResults() - .entrySet() - .stream() - .collect( - Collectors.toMap( - entry -> { - try { - return Urn.createFromString(entry.getKey()); - } catch (URISyntaxException e) { - throw new RuntimeException( - String.format( - "Failed to bind urn string with value %s into urn", entry.getKey())); - } - }, - entry -> entry.getValue().getEntity())); + Map responseMap = new HashMap<>(); + + Iterators.partition(urns.iterator(), batchGetV2Size) + .forEachRemaining( + batch -> { + try { + final EntitiesV2BatchGetRequestBuilder requestBuilder = + ENTITIES_V2_REQUEST_BUILDERS + .batchGet() + .aspectsParam(aspectNames) + .ids(batch.stream().map(Urn::toString).collect(Collectors.toList())); + + responseMap.putAll( + sendClientRequest(requestBuilder, opContext.getSessionAuthentication()) + .getEntity() + .getResults() + .entrySet() + .stream() + .collect( + Collectors.toMap( + entry -> { + try { + return Urn.createFromString(entry.getKey()); + } catch (URISyntaxException e) { + throw new RuntimeException( + String.format( + "Failed to bind urn string with value %s into urn", + entry.getKey())); + } + }, + entry -> entry.getValue().getEntity()))); + } catch (RemoteInvocationException e) { + throw new RuntimeException(e); + } + }); + + return responseMap; } /** @@ -250,31 +269,44 @@ public Map batchGetVersionedV2( @Nonnull OperationContext opContext, @Nonnull String entityName, @Nonnull final Set versionedUrns, - @Nullable final Set aspectNames) - throws RemoteInvocationException, URISyntaxException { - - final EntitiesVersionedV2BatchGetRequestBuilder requestBuilder = - ENTITIES_VERSIONED_V2_REQUEST_BUILDERS - .batchGet() - .aspectsParam(aspectNames) - .entityTypeParam(entityName) - .ids( - versionedUrns.stream() - .map( - versionedUrn -> - com.linkedin.common.urn.VersionedUrn.of( - versionedUrn.getUrn().toString(), versionedUrn.getVersionStamp())) - .collect(Collectors.toSet())); - - return sendClientRequest(requestBuilder, opContext.getSessionAuthentication()) - .getEntity() - .getResults() - .entrySet() - .stream() - .collect( - Collectors.toMap( - entry -> UrnUtils.getUrn(entry.getKey().getUrn()), - entry -> entry.getValue().getEntity())); + @Nullable final Set aspectNames) { + + Map responseMap = new HashMap<>(); + + Iterators.partition(versionedUrns.iterator(), batchGetV2Size) + .forEachRemaining( + batch -> { + final EntitiesVersionedV2BatchGetRequestBuilder requestBuilder = + ENTITIES_VERSIONED_V2_REQUEST_BUILDERS + .batchGet() + .aspectsParam(aspectNames) + .entityTypeParam(entityName) + .ids( + batch.stream() + .map( + versionedUrn -> + com.linkedin.common.urn.VersionedUrn.of( + versionedUrn.getUrn().toString(), + versionedUrn.getVersionStamp())) + .collect(Collectors.toSet())); + + try { + responseMap.putAll( + sendClientRequest(requestBuilder, opContext.getSessionAuthentication()) + .getEntity() + .getResults() + .entrySet() + .stream() + .collect( + Collectors.toMap( + entry -> UrnUtils.getUrn(entry.getKey().getUrn()), + entry -> entry.getValue().getEntity()))); + } catch (RemoteInvocationException e) { + throw new RuntimeException(e); + } + }); + + return responseMap; } /** diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemRestliEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemRestliEntityClient.java index 92c20c750c257..364ee9b0519d2 100644 --- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemRestliEntityClient.java +++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/SystemRestliEntityClient.java @@ -26,8 +26,9 @@ public SystemRestliEntityClient( @Nonnull final Client restliClient, @Nonnull final BackoffPolicy backoffPolicy, int retryCount, - EntityClientCacheConfig cacheConfig) { - super(restliClient, backoffPolicy, retryCount); + EntityClientCacheConfig cacheConfig, + int batchGetV2Size) { + super(restliClient, backoffPolicy, retryCount, batchGetV2Size); this.operationContextMap = CacheBuilder.newBuilder().maximumSize(500).build(); this.entityClientCache = buildEntityClientCache(SystemRestliEntityClient.class, cacheConfig); } diff --git a/metadata-service/restli-client/src/test/java/com/linkedin/common/client/BaseClientTest.java b/metadata-service/restli-client/src/test/java/com/linkedin/common/client/BaseClientTest.java index 1f8342170a2ff..474bb24f9e16b 100644 --- a/metadata-service/restli-client/src/test/java/com/linkedin/common/client/BaseClientTest.java +++ b/metadata-service/restli-client/src/test/java/com/linkedin/common/client/BaseClientTest.java @@ -37,7 +37,7 @@ public void testZeroRetry() throws RemoteInvocationException { when(mockRestliClient.sendRequest(any(ActionRequest.class))).thenReturn(mockFuture); RestliEntityClient testClient = - new RestliEntityClient(mockRestliClient, new ExponentialBackoff(1), 0); + new RestliEntityClient(mockRestliClient, new ExponentialBackoff(1), 0, 10); testClient.sendClientRequest(testRequestBuilder, AUTH); // Expected 1 actual try and 0 retries verify(mockRestliClient).sendRequest(any(ActionRequest.class)); @@ -56,7 +56,7 @@ public void testMultipleRetries() throws RemoteInvocationException { .thenReturn(mockFuture); RestliEntityClient testClient = - new RestliEntityClient(mockRestliClient, new ExponentialBackoff(1), 1); + new RestliEntityClient(mockRestliClient, new ExponentialBackoff(1), 1, 10); testClient.sendClientRequest(testRequestBuilder, AUTH); // Expected 1 actual try and 1 retries verify(mockRestliClient, times(2)).sendRequest(any(ActionRequest.class)); @@ -73,7 +73,7 @@ public void testNonRetry() { .thenThrow(new RuntimeException(new RequiredFieldNotPresentException("value"))); RestliEntityClient testClient = - new RestliEntityClient(mockRestliClient, new ExponentialBackoff(1), 1); + new RestliEntityClient(mockRestliClient, new ExponentialBackoff(1), 1, 10); assertThrows( RuntimeException.class, () -> testClient.sendClientRequest(testRequestBuilder, AUTH)); } diff --git a/metadata-service/restli-client/src/test/java/com/linkedin/entity/client/SystemRestliEntityClientTest.java b/metadata-service/restli-client/src/test/java/com/linkedin/entity/client/SystemRestliEntityClientTest.java index e44acf06386c5..75614ca998f6a 100644 --- a/metadata-service/restli-client/src/test/java/com/linkedin/entity/client/SystemRestliEntityClientTest.java +++ b/metadata-service/restli-client/src/test/java/com/linkedin/entity/client/SystemRestliEntityClientTest.java @@ -45,7 +45,7 @@ public void testCache() throws RemoteInvocationException, URISyntaxException { noCacheConfig.setEnabled(true); SystemRestliEntityClient noCacheTest = - new SystemRestliEntityClient(mockRestliClient, new ConstantBackoff(0), 0, noCacheConfig); + new SystemRestliEntityClient(mockRestliClient, new ConstantBackoff(0), 0, noCacheConfig, 1); com.linkedin.entity.EntityResponse responseStatusTrue = buildStatusResponse(true); com.linkedin.entity.EntityResponse responseStatusFalse = buildStatusResponse(false); @@ -83,7 +83,7 @@ public void testCache() throws RemoteInvocationException, URISyntaxException { Map.of(TEST_URN.getEntityType(), Map.of(Constants.STATUS_ASPECT_NAME, 60))); SystemRestliEntityClient cacheTest = - new SystemRestliEntityClient(mockRestliClient, new ConstantBackoff(0), 0, cacheConfig); + new SystemRestliEntityClient(mockRestliClient, new ConstantBackoff(0), 0, cacheConfig, 1); mockResponse(mockRestliClient, responseStatusTrue); assertEquals( @@ -117,7 +117,7 @@ public void testBatchCache() throws RemoteInvocationException, URISyntaxExceptio noCacheConfig.setEnabled(true); SystemRestliEntityClient noCacheTest = - new SystemRestliEntityClient(mockRestliClient, new ConstantBackoff(0), 0, noCacheConfig); + new SystemRestliEntityClient(mockRestliClient, new ConstantBackoff(0), 0, noCacheConfig, 1); com.linkedin.entity.EntityResponse responseStatusTrue = buildStatusResponse(true); com.linkedin.entity.EntityResponse responseStatusFalse = buildStatusResponse(false); @@ -155,7 +155,7 @@ public void testBatchCache() throws RemoteInvocationException, URISyntaxExceptio Map.of(TEST_URN.getEntityType(), Map.of(Constants.STATUS_ASPECT_NAME, 60))); SystemRestliEntityClient cacheTest = - new SystemRestliEntityClient(mockRestliClient, new ConstantBackoff(0), 0, cacheConfig); + new SystemRestliEntityClient(mockRestliClient, new ConstantBackoff(0), 0, cacheConfig, 1); mockResponse(mockRestliClient, responseStatusTrue); assertEquals( diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java index 17a67bb70ff08..4116b8ad30b94 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java @@ -8,6 +8,7 @@ import static com.linkedin.metadata.authorization.ApiOperation.DELETE; import static com.linkedin.metadata.authorization.ApiOperation.EXISTS; import static com.linkedin.metadata.authorization.ApiOperation.READ; +import static com.linkedin.metadata.entity.validation.ValidationApiUtils.validateOrThrow; import static com.linkedin.metadata.entity.validation.ValidationUtils.*; import static com.linkedin.metadata.resources.restli.RestliConstants.*; import static com.linkedin.metadata.search.utils.SearchUtils.*; diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/Utils.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/Utils.java index 78db69a91df5f..2c411f9ad960e 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/Utils.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/Utils.java @@ -74,7 +74,8 @@ public static String restoreIndices( Map result = new HashMap<>(); result.put("args", args); result.put("result", entityService - .streamRestoreIndices(opContext, args, log::info) + .restoreIndices(opContext, args, log::info) + .stream() .map(RestoreIndicesResult::toString) .collect(Collectors.joining("\n"))); return result.toString(); diff --git a/metadata-service/services/build.gradle b/metadata-service/services/build.gradle index 9ec523bfd1e21..ea1ff32cb3838 100644 --- a/metadata-service/services/build.gradle +++ b/metadata-service/services/build.gradle @@ -8,7 +8,7 @@ configurations { } dependencies { - implementation externalDependency.jsonPatch + implementation 'com.github.java-json-tools:json-patch:1.13' // TODO: Replace with jakarta.json implementation project(':entity-registry') implementation project(':metadata-utils') implementation project(':metadata-events:mxe-avro') diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java index 5250f06bddae0..0794ba72ff692 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java @@ -29,7 +29,6 @@ import java.util.Set; import java.util.concurrent.Future; import java.util.function.Consumer; -import java.util.stream.Stream; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -298,7 +297,7 @@ Integer getCountAspect( @Nonnull OperationContext opContext, @Nonnull String aspectName, @Nullable String urnLike); // TODO: Extract this to a different service, doesn't need to be here - Stream streamRestoreIndices( + List restoreIndices( @Nonnull OperationContext opContext, @Nonnull RestoreIndicesArgs args, @Nonnull Consumer logger); diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/restoreindices/RestoreIndicesArgs.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/restoreindices/RestoreIndicesArgs.java index b4da40871cdd4..89e69174c1502 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/restoreindices/RestoreIndicesArgs.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/restoreindices/RestoreIndicesArgs.java @@ -9,7 +9,7 @@ public class RestoreIndicesArgs implements Cloneable { public static final int DEFAULT_BATCH_SIZE = 500; public static final int DEFAULT_NUM_THREADS = 1; - public static final int DEFAULT_BATCH_DELAY_MS = 1; + public static final int DEFAULT_BATCH_DELAY_MS = 1000; public static final long DEFAULT_GE_PIT_EPOCH_MS = 0; public int start = 0; diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/service/BaseService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/service/BaseService.java index 2c4ea4a634c76..3f9022b634c67 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/service/BaseService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/service/BaseService.java @@ -36,8 +36,7 @@ protected Map getTagsAspects( @Nonnull OperationContext opContext, @Nonnull Set entityUrns, @Nonnull GlobalTags defaultValue) { - - if (entityUrns.size() <= 0) { + if (entityUrns.isEmpty()) { return Collections.emptyMap(); } @@ -75,8 +74,7 @@ protected Map getEditableSchemaMetadataAspects( @Nonnull OperationContext opContext, @Nonnull Set entityUrns, @Nonnull EditableSchemaMetadata defaultValue) { - - if (entityUrns.size() <= 0) { + if (entityUrns.isEmpty()) { return Collections.emptyMap(); } @@ -114,8 +112,7 @@ protected Map getOwnershipAspects( @Nonnull OperationContext opContext, @Nonnull Set entityUrns, @Nonnull Ownership defaultValue) { - - if (entityUrns.size() <= 0) { + if (entityUrns.isEmpty()) { return Collections.emptyMap(); } @@ -153,8 +150,7 @@ protected Map getGlossaryTermsAspects( @Nonnull OperationContext opContext, @Nonnull Set entityUrns, @Nonnull GlossaryTerms defaultValue) { - - if (entityUrns.size() <= 0) { + if (entityUrns.isEmpty()) { return Collections.emptyMap(); } @@ -192,8 +188,7 @@ protected Map getDomainsAspects( @Nonnull OperationContext opContext, @Nonnull Set entityUrns, @Nonnull Domains defaultValue) { - - if (entityUrns.size() <= 0) { + if (entityUrns.isEmpty()) { return Collections.emptyMap(); } diff --git a/metadata-service/war/src/main/java/com/linkedin/gms/CommonApplicationConfig.java b/metadata-service/war/src/main/java/com/linkedin/gms/CommonApplicationConfig.java index 5d956c2c8ffad..c44cb4eaa1ac3 100644 --- a/metadata-service/war/src/main/java/com/linkedin/gms/CommonApplicationConfig.java +++ b/metadata-service/war/src/main/java/com/linkedin/gms/CommonApplicationConfig.java @@ -14,6 +14,8 @@ @ComponentScan( basePackages = { "com.linkedin.metadata.boot", + "com.linkedin.metadata.service", + "com.datahub.event", "com.linkedin.gms.factory.config", "com.linkedin.gms.factory.entityregistry", "com.linkedin.gms.factory.common", @@ -34,7 +36,7 @@ "com.linkedin.gms.factory.auth", "com.linkedin.gms.factory.search", "com.linkedin.gms.factory.secret", - "com.linkedin.gms.factory.timeseries" + "com.linkedin.gms.factory.timeseries", }) @PropertySource(value = "classpath:/application.yaml", factory = YamlPropertySourceFactory.class) @Configuration diff --git a/metadata-service/war/src/main/java/com/linkedin/gms/servlet/GraphQLServletConfig.java b/metadata-service/war/src/main/java/com/linkedin/gms/servlet/GraphQLServletConfig.java index 64ec11f58c60d..42413df0757e6 100644 --- a/metadata-service/war/src/main/java/com/linkedin/gms/servlet/GraphQLServletConfig.java +++ b/metadata-service/war/src/main/java/com/linkedin/gms/servlet/GraphQLServletConfig.java @@ -18,7 +18,8 @@ "com.linkedin.gms.factory.query", "com.linkedin.gms.factory.ermodelrelation", "com.linkedin.gms.factory.dataproduct", - "com.linkedin.gms.factory.businessattribute" + "com.linkedin.gms.factory.businessattribute", + "com.linkedin.gms.factory.connection" }) @Configuration public class GraphQLServletConfig {} diff --git a/metadata-service/war/src/main/resources/logback.xml b/metadata-service/war/src/main/resources/logback.xml index ba15f3045d1a2..ebd161023d13c 100644 --- a/metadata-service/war/src/main/resources/logback.xml +++ b/metadata-service/war/src/main/resources/logback.xml @@ -58,6 +58,31 @@ + + ${LOG_DIR}/gms.graphql.log + + ${LOG_DIR}/gms.graphql.%d{yyyy-dd-MM}-%i.log + + 100MB + + 2GB + + 1 + + + ${logging.appender.graphql_debug_file.level:-DEBUG} + ACCEPT + DENY + + + %date{ISO8601} [%thread] %-5level %logger{36} - %msg%n + + + + + + + @@ -75,6 +100,13 @@ + + + + + + + diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java index 6788f6e87fc0d..ea8f52925b5b3 100644 --- a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java @@ -142,6 +142,10 @@ public class PoliciesConfig { "Manage Business Attribute", "Create, update, delete Business Attribute"); + public static final Privilege MANAGE_CONNECTIONS_PRIVILEGE = + Privilege.of( + "MANAGE_CONNECTIONS", "Manage Connections", "Manage connections to external platforms."); + public static final List PLATFORM_PRIVILEGES = ImmutableList.of( MANAGE_POLICIES_PRIVILEGE, @@ -164,7 +168,8 @@ public class PoliciesConfig { MANAGE_GLOBAL_VIEWS, MANAGE_GLOBAL_OWNERSHIP_TYPES, CREATE_BUSINESS_ATTRIBUTE_PRIVILEGE, - MANAGE_BUSINESS_ATTRIBUTE_PRIVILEGE); + MANAGE_BUSINESS_ATTRIBUTE_PRIVILEGE, + MANAGE_CONNECTIONS_PRIVILEGE); // Resource Privileges // @@ -653,6 +658,19 @@ public class PoliciesConfig { CREATE_ENTITY_PRIVILEGE, EXISTS_ENTITY_PRIVILEGE)); + // Properties Privileges + public static final ResourcePrivileges STRUCTURED_PROPERTIES_PRIVILEGES = + ResourcePrivileges.of( + "structuredProperty", + "Structured Properties", + "Structured Properties", + ImmutableList.of( + CREATE_ENTITY_PRIVILEGE, + VIEW_ENTITY_PAGE_PRIVILEGE, + EXISTS_ENTITY_PRIVILEGE, + EDIT_ENTITY_PRIVILEGE, + DELETE_ENTITY_PRIVILEGE)); + // ERModelRelationship Privileges public static final ResourcePrivileges ER_MODEL_RELATIONSHIP_PRIVILEGES = ResourcePrivileges.of( @@ -689,7 +707,8 @@ public class PoliciesConfig { NOTEBOOK_PRIVILEGES, DATA_PRODUCT_PRIVILEGES, ER_MODEL_RELATIONSHIP_PRIVILEGES, - BUSINESS_ATTRIBUTE_PRIVILEGES); + BUSINESS_ATTRIBUTE_PRIVILEGES, + STRUCTURED_PROPERTIES_PRIVILEGES); // Merge all entity specific resource privileges to create a superset of all resource privileges public static final ResourcePrivileges ALL_RESOURCE_PRIVILEGES = diff --git a/settings.gradle b/settings.gradle index f553bf97ec14b..a09e9a650803f 100644 --- a/settings.gradle +++ b/settings.gradle @@ -43,6 +43,7 @@ include 'metadata-testing:metadata-models-test-utils' include 'metadata-testing:metadata-test-utils' include 'entity-registry' include 'metadata-io' +include 'metadata-io:metadata-io-api' include 'datahub-upgrade' include 'metadata-utils' include 'li-utils' @@ -53,6 +54,7 @@ include 'metadata-models-custom' include 'entity-registry:custom-test-model' include 'metadata-integration:java:spark-lineage' include 'metadata-integration:java:datahub-client' +include 'metadata-integration:java:custom-plugin-lib' include 'metadata-integration:java:datahub-event' include 'metadata-integration:java:datahub-protobuf' include 'metadata-integration:java:openlineage-converter' diff --git a/smoke-test/build.gradle b/smoke-test/build.gradle index 7e9c50f121548..9800cf65fc452 100644 --- a/smoke-test/build.gradle +++ b/smoke-test/build.gradle @@ -44,6 +44,11 @@ task yarnInstall(type: YarnTask) { environment = ['NODE_OPTIONS': '--openssl-legacy-provider'] args = ['install', '--cwd', "${project.rootDir}/smoke-test/tests/cypress"] } +task cypressLint(type: YarnTask, dependsOn: yarnInstall) { + environment = ['NODE_OPTIONS': '--openssl-legacy-provider'] + // TODO: Run a full lint instead of just format. + args = ['--cwd', "${project.rootDir}/smoke-test/tests/cypress", 'run', 'format'] +} task installDev(type: Exec) { inputs.file file('pyproject.toml') @@ -58,7 +63,7 @@ task installDev(type: Exec) { "touch ${venv_name}/.build_install_dev_sentinel" } -task lint(type: Exec, dependsOn: installDev) { +task pythonLint(type: Exec, dependsOn: installDev) { commandLine 'bash', '-c', "source ${venv_name}/bin/activate && set -x && " + "black --check --diff tests/ && " + @@ -66,7 +71,7 @@ task lint(type: Exec, dependsOn: installDev) { "ruff --statistics tests/ && " + "mypy tests/" } -task lintFix(type: Exec, dependsOn: installDev) { +task pythonLintFix(type: Exec, dependsOn: installDev) { commandLine 'bash', '-c', "source ${venv_name}/bin/activate && set -x && " + "black tests/ && " + @@ -135,6 +140,21 @@ task cypressDev(type: Exec, dependsOn: [installDev, ':metadata-ingestion:install environment 'DATAHUB_KAFKA_SCHEMA_REGISTRY_URL', 'http://localhost:8080/schema-registry/api/' environment 'KAFKA_BROKER_CONTAINER', 'datahub-kafka-broker-1' + workingDir = project.projectDir + commandLine 'bash', '-c', + "source ${venv_name}/bin/activate && set -x && " + + "./cypress-dev.sh" +} + +/** + * The following will install Cypress data in an already running stack. + */ +task cypressData(type: Exec, dependsOn: [installDev, ':metadata-ingestion:installDev']) { + environment 'RUN_QUICKSTART', 'false' + environment 'DATAHUB_KAFKA_SCHEMA_REGISTRY_URL', 'http://localhost:8080/schema-registry/api/' + environment 'KAFKA_BROKER_CONTAINER', 'datahub-kafka-broker-1' + environment 'RUN_UI', 'false' + workingDir = project.projectDir commandLine 'bash', '-c', "source ${venv_name}/bin/activate && set -x && " + diff --git a/smoke-test/cypress-dev.sh b/smoke-test/cypress-dev.sh index 090189af3f1a7..2b31c574d0578 100755 --- a/smoke-test/cypress-dev.sh +++ b/smoke-test/cypress-dev.sh @@ -19,5 +19,7 @@ yarn install source "$DIR/set-cypress-creds.sh" -npx cypress open \ - --env "ADMIN_DISPLAYNAME=$CYPRESS_ADMIN_DISPLAYNAME,ADMIN_USERNAME=$CYPRESS_ADMIN_USERNAME,ADMIN_PASSWORD=$CYPRESS_ADMIN_PASSWORD" +if [ "${RUN_UI:-true}" == "true" ]; then + npx cypress open \ + --env "ADMIN_DISPLAYNAME=$CYPRESS_ADMIN_DISPLAYNAME,ADMIN_USERNAME=$CYPRESS_ADMIN_USERNAME,ADMIN_PASSWORD=$CYPRESS_ADMIN_PASSWORD" +fi diff --git a/smoke-test/requirements.txt b/smoke-test/requirements.txt index c5d43163dff5d..861c69f354fe5 100644 --- a/smoke-test/requirements.txt +++ b/smoke-test/requirements.txt @@ -16,3 +16,5 @@ ruff==0.0.287 # stub version are copied from metadata-ingestion/setup.py and that should be the source of truth types-requests>=2.28.11.6,<=2.31.0.3 types-PyYAML +# https://github.com/docker/docker-py/issues/3256 +requests<=2.31.0 diff --git a/smoke-test/tests/cypress/.eslintrc.js b/smoke-test/tests/cypress/.eslintrc.js new file mode 100644 index 0000000000000..2dfa99ac5a374 --- /dev/null +++ b/smoke-test/tests/cypress/.eslintrc.js @@ -0,0 +1,31 @@ +module.exports = { + env: { + es2021: true, + node: true, + }, + plugins: ["cypress"], + extends: ["airbnb-base", "plugin:cypress/recommended", "prettier"], + overrides: [ + { + env: { + node: true, + }, + files: [".eslintrc.{js,cjs}"], + parserOptions: { + sourceType: "script", + }, + }, + ], + parserOptions: { + ecmaVersion: "latest", + sourceType: "module", + }, + rules: { + camelcase: "off", + "import/prefer-default-export": "off", + // TODO: These should be upgraded to warnings and fixed. + "cypress/no-unnecessary-waiting": "off", + "cypress/unsafe-to-chain-command": "off", + "no-unused-vars": "off", + }, +}; diff --git a/smoke-test/tests/cypress/cypress.config.js b/smoke-test/tests/cypress/cypress.config.js index 3eb65825378b9..7c3863ad869e3 100644 --- a/smoke-test/tests/cypress/cypress.config.js +++ b/smoke-test/tests/cypress/cypress.config.js @@ -1,10 +1,11 @@ -const { defineConfig } = require('cypress') +// eslint-disable-next-line global-require +const { defineConfig } = require("cypress"); module.exports = defineConfig({ chromeWebSecurity: false, viewportHeight: 960, viewportWidth: 1536, - projectId: 'hkrxk5', + projectId: "hkrxk5", defaultCommandTimeout: 10000, retries: { runMode: 2, @@ -15,10 +16,11 @@ module.exports = defineConfig({ // We've imported your old cypress plugins here. // You may want to clean this up later by importing these. setupNodeEvents(on, config) { - return require('./cypress/plugins/index.js')(on, config) + // eslint-disable-next-line global-require + return require("./cypress/plugins/index")(on, config); }, - baseUrl: 'http://localhost:9002/', - specPattern: 'cypress/e2e/**/*.{js,jsx,ts,tsx}', + baseUrl: "http://localhost:9002/", + specPattern: "cypress/e2e/**/*.{js,jsx,ts,tsx}", experimentalStudio: true, }, -}) +}); diff --git a/smoke-test/tests/cypress/cypress/e2e/analytics/analytics.js b/smoke-test/tests/cypress/cypress/e2e/analytics/analytics.js index 0e5105717d2be..6b5452ff07d88 100644 --- a/smoke-test/tests/cypress/cypress/e2e/analytics/analytics.js +++ b/smoke-test/tests/cypress/cypress/e2e/analytics/analytics.js @@ -1,5 +1,5 @@ -describe('analytics', () => { - it('can go to a chart and see analytics in tab views', () => { +describe("analytics", () => { + it("can go to a chart and see analytics in tab views", () => { cy.login(); cy.goToChart("urn:li:chart:(looker,cypress_baz1)"); @@ -9,8 +9,8 @@ describe('analytics', () => { cy.goToAnalytics(); cy.contains("Tab Views By Entity Type (Past Week)").scrollIntoView({ - ensureScrollable: false - }) + ensureScrollable: false, + }); cy.waitTextPresent("dashboards"); }); -}) +}); diff --git a/smoke-test/tests/cypress/cypress/e2e/auto_complete/auto_complete.js b/smoke-test/tests/cypress/cypress/e2e/auto_complete/auto_complete.js index adff124ca857c..df7d20eb6f933 100644 --- a/smoke-test/tests/cypress/cypress/e2e/auto_complete/auto_complete.js +++ b/smoke-test/tests/cypress/cypress/e2e/auto_complete/auto_complete.js @@ -29,7 +29,7 @@ describe("auto-complete", () => { cy.get('[data-testid^="auto-complete-option"]').first().click(); cy.url().should( "include", - "dataset/urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)" + "dataset/urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)", ); cy.wait(2000); }); @@ -43,7 +43,7 @@ describe("auto-complete", () => { cy.get('[data-testid="quick-filter-DASHBOARD"]').click(); cy.wait(2000); cy.get('[data-testid="auto-complete-entity-name-Baz Chart 2').should( - "not.exist" + "not.exist", ); cy.contains("Baz Dashboard"); cy.wait(1000); @@ -58,7 +58,7 @@ describe("auto-complete", () => { cy.focused().type("{enter}"); cy.url().should( "include", - "?filter_platform___false___EQUAL___0=urn%3Ali%3AdataPlatform%3Abigquery" + "?filter_platform___false___EQUAL___0=urn%3Ali%3AdataPlatform%3Abigquery", ); }); }); diff --git a/smoke-test/tests/cypress/cypress/e2e/browse/browseV2.js b/smoke-test/tests/cypress/cypress/e2e/browse/browseV2.js index d951b15d4a592..3a6759ae87afc 100644 --- a/smoke-test/tests/cypress/cypress/e2e/browse/browseV2.js +++ b/smoke-test/tests/cypress/cypress/e2e/browse/browseV2.js @@ -83,7 +83,7 @@ describe("search", () => { cy.url().should("include", "/browse/dataset"); cy.url().should( "not.include", - "search?filter__entityType%E2%90%9EtypeNames___false___EQUAL___0=DATASET" + "search?filter__entityType%E2%90%9EtypeNames___false___EQUAL___0=DATASET", ); }); @@ -91,7 +91,7 @@ describe("search", () => { setBrowseFeatureFlag(true); cy.login(); cy.visit( - "/dataset/urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.customers,PROD)" + "/dataset/urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.customers,PROD)", ); cy.get('[data-testid="browse-path-cypress_project"]').click({ force: true, @@ -99,15 +99,15 @@ describe("search", () => { cy.url().should("not.include", "/browse/dataset"); cy.url().should( "include", - "filter__entityType%E2%90%9EtypeNames___false___EQUAL___0=DATASET" + "filter__entityType%E2%90%9EtypeNames___false___EQUAL___0=DATASET", ); cy.url().should( "include", - "filter_platform___false___EQUAL___1=urn%3Ali%3AdataPlatform%3Abigquery" + "filter_platform___false___EQUAL___1=urn%3Ali%3AdataPlatform%3Abigquery", ); cy.url().should( "include", - "filter_browsePathV2___false___EQUAL___2=%E2%90%9Fcypress_project" + "filter_browsePathV2___false___EQUAL___2=%E2%90%9Fcypress_project", ); }); @@ -135,7 +135,7 @@ describe("search", () => { cy.url().should("not.include", "/browse/dataset"); cy.url().should( "include", - "search?filter__entityType%E2%90%9EtypeNames___false___EQUAL___0=DATASET" + "search?filter__entityType%E2%90%9EtypeNames___false___EQUAL___0=DATASET", ); cy.get("[data-testid=browse-platform-BigQuery]"); }); @@ -156,15 +156,15 @@ describe("search", () => { cy.url().should( "include", - "filter__entityType%E2%90%9EtypeNames___false___EQUAL___0=DATASET" + "filter__entityType%E2%90%9EtypeNames___false___EQUAL___0=DATASET", ); cy.url().should( "include", - "filter_platform___false___EQUAL___1=urn%3Ali%3AdataPlatform%3Abigquery" + "filter_platform___false___EQUAL___1=urn%3Ali%3AdataPlatform%3Abigquery", ); cy.url().should( "include", - "filter_browsePathV2___false___EQUAL___2=%E2%90%9Fcypress_project%E2%90%9Fjaffle_shop" + "filter_browsePathV2___false___EQUAL___2=%E2%90%9Fcypress_project%E2%90%9Fjaffle_shop", ); // close each of the levels, ensuring its children aren't visible anymore @@ -175,7 +175,7 @@ describe("search", () => { cy.get("[data-testid=browse-platform-BigQuery]").click({ force: true }); cy.get("[data-testid=browse-node-expand-cypress_project]").should( - "not.be.visible" + "not.be.visible", ); cy.get("[data-testid=browse-entity-Datasets]").click({ force: true }); @@ -197,30 +197,30 @@ describe("search", () => { cy.url().should( "include", - "filter__entityType%E2%90%9EtypeNames___false___EQUAL___0=DATASET" + "filter__entityType%E2%90%9EtypeNames___false___EQUAL___0=DATASET", ); cy.url().should( "include", - "filter_platform___false___EQUAL___1=urn%3Ali%3AdataPlatform%3Abigquery" + "filter_platform___false___EQUAL___1=urn%3Ali%3AdataPlatform%3Abigquery", ); cy.url().should( "include", - "filter_browsePathV2___false___EQUAL___2=%E2%90%9Fcypress_project%E2%90%9Fjaffle_shop" + "filter_browsePathV2___false___EQUAL___2=%E2%90%9Fcypress_project%E2%90%9Fjaffle_shop", ); cy.get("[data-testid=browse-node-jaffle_shop]").click({ force: true }); cy.url().should( "not.include", - "filter__entityType%E2%90%9EtypeNames___false___EQUAL___0=DATASET" + "filter__entityType%E2%90%9EtypeNames___false___EQUAL___0=DATASET", ); cy.url().should( "not.include", - "filter_platform___false___EQUAL___1=urn%3Ali%3AdataPlatform%3Abigquery" + "filter_platform___false___EQUAL___1=urn%3Ali%3AdataPlatform%3Abigquery", ); cy.url().should( "not.include", - "filter_browsePathV2___false___EQUAL___2=%E2%90%9Fcypress_project%E2%90%9Fjaffle_shop" + "filter_browsePathV2___false___EQUAL___2=%E2%90%9Fcypress_project%E2%90%9Fjaffle_shop", ); }); }); diff --git a/smoke-test/tests/cypress/cypress/e2e/businessAttribute/attribute_mutations.js b/smoke-test/tests/cypress/cypress/e2e/businessAttribute/attribute_mutations.js index decee024f050b..a0aabe385b742 100644 --- a/smoke-test/tests/cypress/cypress/e2e/businessAttribute/attribute_mutations.js +++ b/smoke-test/tests/cypress/cypress/e2e/businessAttribute/attribute_mutations.js @@ -1,119 +1,127 @@ import { aliasQuery, hasOperationName } from "../utils"; describe("attribute list adding tags and terms", () => { - let businessAttributeEntityEnabled; + let businessAttributeEntityEnabled; - beforeEach(() => { - cy.intercept("POST", "/api/v2/graphql", (req) => { - aliasQuery(req, "appConfig"); - }); - }); - - const setBusinessAttributeFeatureFlag = () => { - cy.intercept("POST", "/api/v2/graphql", (req) => { - if (hasOperationName(req, "appConfig")) { - req.reply((res) => { - businessAttributeEntityEnabled = res.body.data.appConfig.featureFlags.businessAttributeEntityEnabled; - return res; - }); - } - }).as('apiCall'); - }; - - - it("can create and add a tag to business attribute and visit new tag page", () => { - setBusinessAttributeFeatureFlag(); - cy.login(); - cy.visit("/business-attribute"); - cy.wait('@apiCall').then(() => { - if (!businessAttributeEntityEnabled) { - return; - } - cy.wait(3000); - cy.waitTextVisible("Business Attribute"); - cy.wait(3000); - - cy.mouseover('[data-testid="schema-field-cypressTestAttribute-tags"]'); - cy.get('[data-testid="schema-field-cypressTestAttribute-tags"]').within(() => - cy.contains("Add Tags").click() - ); - - cy.enterTextInTestId("tag-term-modal-input", "CypressAddTagToAttribute"); - - cy.contains("Create CypressAddTagToAttribute").click({ force: true }); - - cy.get("textarea").type("CypressAddTagToAttribute Test Description"); - - cy.contains(/Create$/).click({ force: true }); - - // wait a breath for elasticsearch to index the tag being applied to the business attribute- if we navigate too quick ES - // wont know and we'll see applied to 0 entities - cy.wait(3000); - - // go to tag drawer - cy.contains("CypressAddTagToAttribute").click({ force: true }); - - cy.wait(3000); - - // Click the Tag Details to launch full profile - cy.contains("Tag Details").click({ force: true }); - - cy.wait(3000); - - // title of tag page - cy.contains("CypressAddTagToAttribute"); - - // description of tag page - cy.contains("CypressAddTagToAttribute Test Description"); - - cy.wait(3000); - // used by panel - click to search - cy.contains("1 Business Attributes").click({ force: true }); - - // verify business attribute shows up in search now - cy.contains("of 1 result").click({ force: true }); - cy.contains("cypressTestAttribute").click({ force: true }); - cy.get('[data-testid="tag-CypressAddTagToAttribute"]').within(() => - cy.get("span[aria-label=close]").click() - ); - cy.contains("Yes").click(); - - cy.contains("CypressAddTagToAttribute").should("not.exist"); - - cy.goToTag("urn:li:tag:CypressAddTagToAttribute", "CypressAddTagToAttribute"); - cy.deleteFromDropdown(); + beforeEach(() => { + cy.intercept("POST", "/api/v2/graphql", (req) => { + aliasQuery(req, "appConfig"); + }); + }); + + const setBusinessAttributeFeatureFlag = () => { + cy.intercept("POST", "/api/v2/graphql", (req) => { + if (hasOperationName(req, "appConfig")) { + req.reply((res) => { + businessAttributeEntityEnabled = + res.body.data.appConfig.featureFlags.businessAttributeEntityEnabled; + return res; }); + } + }).as("apiCall"); + }; - }); + it("can create and add a tag to business attribute and visit new tag page", () => { + setBusinessAttributeFeatureFlag(); + cy.login(); + cy.visit("/business-attribute"); + cy.wait("@apiCall").then(() => { + if (!businessAttributeEntityEnabled) { + return; + } + cy.wait(3000); + cy.waitTextVisible("Business Attribute"); + cy.wait(3000); + cy.mouseover('[data-testid="schema-field-cypressTestAttribute-tags"]'); + cy.get('[data-testid="schema-field-cypressTestAttribute-tags"]').within( + () => cy.contains("Add Tags").click(), + ); - it("can add and remove terms from a business attribute", () => { - setBusinessAttributeFeatureFlag(); - cy.login(); - cy.visit("/business-attribute/" + "urn:li:businessAttribute:cypressTestAttribute"); - cy.wait('@apiCall').then(() => { - if (!businessAttributeEntityEnabled) { - return; - } - cy.wait(3000); - cy.waitTextVisible("cypressTestAttribute"); - cy.wait(3000); - cy.clickOptionWithText("Add Terms"); - cy.selectOptionInTagTermModal("CypressTerm"); - cy.contains("CypressTerm"); - - cy.goToBusinessAttributeList(); - cy.get('[data-testid="schema-field-cypressTestAttribute-terms"]').contains("CypressTerm"); - - cy.get('[data-testid="schema-field-cypressTestAttribute-terms"]').within(() => - cy - .get("span[aria-label=close]") - .trigger("mouseover", { force: true }) - .click({ force: true }) - ); - cy.contains("Yes").click({ force: true }); - - cy.get('[data-testid="schema-field-cypressTestAttribute-terms"]').contains("CypressTerm").should("not.exist"); - }); + cy.enterTextInTestId("tag-term-modal-input", "CypressAddTagToAttribute"); + + cy.contains("Create CypressAddTagToAttribute").click({ force: true }); + + cy.get("textarea").type("CypressAddTagToAttribute Test Description"); + + cy.contains(/Create$/).click({ force: true }); + + // wait a breath for elasticsearch to index the tag being applied to the business attribute- if we navigate too quick ES + // wont know and we'll see applied to 0 entities + cy.wait(3000); + + // go to tag drawer + cy.contains("CypressAddTagToAttribute").click({ force: true }); + + cy.wait(3000); + + // Click the Tag Details to launch full profile + cy.contains("Tag Details").click({ force: true }); + + cy.wait(3000); + + // title of tag page + cy.contains("CypressAddTagToAttribute"); + + // description of tag page + cy.contains("CypressAddTagToAttribute Test Description"); + + cy.wait(3000); + // used by panel - click to search + cy.contains("1 Business Attributes").click({ force: true }); + + // verify business attribute shows up in search now + cy.contains("of 1 result").click({ force: true }); + cy.contains("cypressTestAttribute").click({ force: true }); + cy.get('[data-testid="tag-CypressAddTagToAttribute"]').within(() => + cy.get("span[aria-label=close]").click(), + ); + cy.contains("Yes").click(); + + cy.contains("CypressAddTagToAttribute").should("not.exist"); + + cy.goToTag( + "urn:li:tag:CypressAddTagToAttribute", + "CypressAddTagToAttribute", + ); + cy.deleteFromDropdown(); + }); + }); + + it("can add and remove terms from a business attribute", () => { + setBusinessAttributeFeatureFlag(); + cy.login(); + cy.visit( + "/business-attribute/" + "urn:li:businessAttribute:cypressTestAttribute", + ); + cy.wait("@apiCall").then(() => { + if (!businessAttributeEntityEnabled) { + return; + } + cy.wait(3000); + cy.waitTextVisible("cypressTestAttribute"); + cy.wait(3000); + cy.clickOptionWithText("Add Terms"); + cy.selectOptionInTagTermModal("CypressTerm"); + cy.contains("CypressTerm"); + + cy.goToBusinessAttributeList(); + cy.get( + '[data-testid="schema-field-cypressTestAttribute-terms"]', + ).contains("CypressTerm"); + + cy.get('[data-testid="schema-field-cypressTestAttribute-terms"]').within( + () => + cy + .get("span[aria-label=close]") + .trigger("mouseover", { force: true }) + .click({ force: true }), + ); + cy.contains("Yes").click({ force: true }); + + cy.get('[data-testid="schema-field-cypressTestAttribute-terms"]') + .contains("CypressTerm") + .should("not.exist"); }); + }); }); diff --git a/smoke-test/tests/cypress/cypress/e2e/businessAttribute/businessAttribute.js b/smoke-test/tests/cypress/cypress/e2e/businessAttribute/businessAttribute.js index 0657dc238a154..a106915463d3d 100644 --- a/smoke-test/tests/cypress/cypress/e2e/businessAttribute/businessAttribute.js +++ b/smoke-test/tests/cypress/cypress/e2e/businessAttribute/businessAttribute.js @@ -1,197 +1,215 @@ import { aliasQuery, hasOperationName } from "../utils"; describe("businessAttribute", () => { - let businessAttributeEntityEnabled; + let businessAttributeEntityEnabled; - beforeEach(() => { - cy.intercept("POST", "/api/v2/graphql", (req) => { - aliasQuery(req, "appConfig"); - }); - }); - - const setBusinessAttributeFeatureFlag = () => { - cy.intercept("POST", "/api/v2/graphql", (req) => { - if (hasOperationName(req, "appConfig")) { - req.reply((res) => { - businessAttributeEntityEnabled = res.body.data.appConfig.featureFlags.businessAttributeEntityEnabled; - return res; - }); - } - }).as('apiCall'); - }; - - it('go to business attribute page, create attribute ', function () { - const urn="urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)"; - const businessAttribute="CypressBusinessAttribute"; - const datasetName = "cypress_logging_events"; - setBusinessAttributeFeatureFlag(); - cy.login(); - cy.visit("/business-attribute"); - cy.wait('@apiCall').then(() => { - if (!businessAttributeEntityEnabled) { - return; - } - cy.wait(3000); - cy.waitTextVisible("Business Attribute"); - cy.wait(3000); - cy.clickOptionWithText("Create Business Attribute"); - cy.addBusinessAttributeViaModal(businessAttribute, "Create Business Attribute", businessAttribute, "create-business-attribute-button"); - - cy.wait(3000); - cy.goToBusinessAttributeList() - - cy.wait(3000) - cy.contains(businessAttribute).should("be.visible"); - - cy.addAttributeToDataset(urn, datasetName, businessAttribute); - - cy.get('[data-testid="schema-field-event_name-businessAttribute"]').within(() => - cy - .get("span[aria-label=close]") - .trigger("mouseover", { force: true }) - .click({ force: true }) - ); - cy.contains("Yes").click({ force: true }); - - cy.get('[data-testid="schema-field-event_name-businessAttribute"]').contains("CypressBusinessAttribute").should("not.exist"); - - cy.goToBusinessAttributeList(); - cy.clickOptionWithText(businessAttribute); - cy.deleteFromDropdown(); - - cy.goToBusinessAttributeList(); - cy.ensureTextNotPresent(businessAttribute); - }); + beforeEach(() => { + cy.intercept("POST", "/api/v2/graphql", (req) => { + aliasQuery(req, "appConfig"); }); - - it('Inheriting tags and terms from business attribute to dataset ', function () { - const urn="urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)"; - const businessAttribute="CypressAttribute"; - const datasetName = "cypress_logging_events"; - const term="CypressTerm"; - const tag="Cypress"; - setBusinessAttributeFeatureFlag(); - cy.login(); - cy.visit("/dataset/" + urn); - cy.wait('@apiCall').then(() => { - if (!businessAttributeEntityEnabled) { - return; - } - cy.wait(5000); - cy.waitTextVisible(datasetName); - cy.clickOptionWithText("event_name"); - cy.contains("Business Attribute"); - cy.get('[data-testid="schema-field-event_name-businessAttribute"]').within(() => - cy.contains("Add Attribute").click() - ); - cy.selectOptionInAttributeModal(businessAttribute); - cy.contains(businessAttribute); - cy.contains(term); - cy.contains(tag); + }); + + const setBusinessAttributeFeatureFlag = () => { + cy.intercept("POST", "/api/v2/graphql", (req) => { + if (hasOperationName(req, "appConfig")) { + req.reply((res) => { + businessAttributeEntityEnabled = + res.body.data.appConfig.featureFlags.businessAttributeEntityEnabled; + return res; }); + } + }).as("apiCall"); + }; + + it("go to business attribute page, create attribute ", () => { + const urn = + "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)"; + const businessAttribute = "CypressBusinessAttribute"; + const datasetName = "cypress_logging_events"; + setBusinessAttributeFeatureFlag(); + cy.login(); + cy.visit("/business-attribute"); + cy.wait("@apiCall").then(() => { + if (!businessAttributeEntityEnabled) { + return; + } + cy.wait(3000); + cy.waitTextVisible("Business Attribute"); + cy.wait(3000); + cy.clickOptionWithText("Create Business Attribute"); + cy.addBusinessAttributeViaModal( + businessAttribute, + "Create Business Attribute", + businessAttribute, + "create-business-attribute-button", + ); + + cy.wait(3000); + cy.goToBusinessAttributeList(); + + cy.wait(3000); + cy.contains(businessAttribute).should("be.visible"); + + cy.addAttributeToDataset(urn, datasetName, businessAttribute); + + cy.get( + '[data-testid="schema-field-event_name-businessAttribute"]', + ).within(() => + cy + .get("span[aria-label=close]") + .trigger("mouseover", { force: true }) + .click({ force: true }), + ); + cy.contains("Yes").click({ force: true }); + + cy.get('[data-testid="schema-field-event_name-businessAttribute"]') + .contains("CypressBusinessAttribute") + .should("not.exist"); + + cy.goToBusinessAttributeList(); + cy.clickOptionWithText(businessAttribute); + cy.deleteFromDropdown(); + + cy.goToBusinessAttributeList(); + cy.ensureTextNotPresent(businessAttribute); }); - - it("can visit related entities", () => { - const businessAttribute="CypressAttribute"; - setBusinessAttributeFeatureFlag(); - cy.login(); - cy.visit("/business-attribute"); - cy.wait('@apiCall').then(() => { - if (!businessAttributeEntityEnabled) { - return; - } - cy.wait(3000); - cy.waitTextVisible("Business Attribute"); - cy.wait(3000); - cy.clickOptionWithText(businessAttribute); - cy.clickOptionWithText("Related Entities"); - //cy.visit("/business-attribute/urn:li:businessAttribute:37c81832-06e0-40b1-a682-858e1dd0d449/Related%20Entities"); - //cy.wait(5000); - cy.contains("of 0").should("not.exist"); - cy.contains(/of [0-9]+/); - }); + }); + + it("Inheriting tags and terms from business attribute to dataset ", () => { + const urn = + "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)"; + const businessAttribute = "CypressAttribute"; + const datasetName = "cypress_logging_events"; + const term = "CypressTerm"; + const tag = "Cypress"; + setBusinessAttributeFeatureFlag(); + cy.login(); + cy.visit(`/dataset/${urn}`); + cy.wait("@apiCall").then(() => { + if (!businessAttributeEntityEnabled) { + return; + } + cy.wait(5000); + cy.waitTextVisible(datasetName); + cy.clickOptionWithText("event_name"); + cy.contains("Business Attribute"); + cy.get( + '[data-testid="schema-field-event_name-businessAttribute"]', + ).within(() => cy.contains("Add Attribute").click()); + cy.selectOptionInAttributeModal(businessAttribute); + cy.contains(businessAttribute); + cy.contains(term); + cy.contains(tag); }); - - - it("can search related entities by query", () => { - setBusinessAttributeFeatureFlag(); - cy.login(); - cy.visit("/business-attribute/urn:li:businessAttribute:37c81832-06e0-40b1-a682-858e1dd0d449/Related%20Entities"); - cy.wait('@apiCall').then(() => { - if (!businessAttributeEntityEnabled) { - return; - } - cy.get('[placeholder="Filter entities..."]').click().type( - "event_n{enter}" - ); - cy.wait(5000); - cy.contains("of 0").should("not.exist"); - cy.contains(/of 1/); - cy.contains("event_name"); - }); + }); + + it("can visit related entities", () => { + const businessAttribute = "CypressAttribute"; + setBusinessAttributeFeatureFlag(); + cy.login(); + cy.visit("/business-attribute"); + cy.wait("@apiCall").then(() => { + if (!businessAttributeEntityEnabled) { + return; + } + cy.wait(3000); + cy.waitTextVisible("Business Attribute"); + cy.wait(3000); + cy.clickOptionWithText(businessAttribute); + cy.clickOptionWithText("Related Entities"); + // cy.visit("/business-attribute/urn:li:businessAttribute:37c81832-06e0-40b1-a682-858e1dd0d449/Related%20Entities"); + // cy.wait(5000); + cy.contains("of 0").should("not.exist"); + cy.contains(/of [0-9]+/); }); - - it("remove business attribute from dataset", () => { - const urn="urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)"; - const datasetName = "cypress_logging_events"; - setBusinessAttributeFeatureFlag(); - cy.login(); - cy.visit("/dataset/" + urn); - cy.wait('@apiCall').then(() => { - if (!businessAttributeEntityEnabled) { - return; - } - cy.wait(5000); - cy.waitTextVisible(datasetName); - - cy.wait(3000); - cy.get('body').then(($body) => { - if ($body.find('button[aria-label="Close"]').length > 0) { - cy.get('button[aria-label="Close"]').click(); - } - }); - cy.clickOptionWithText("event_name"); - cy.get('[data-testid="schema-field-event_name-businessAttribute"]').within(() => - cy - .get("span[aria-label=close]") - .trigger("mouseover", { force: true }) - .click({ force: true }) - ); - cy.contains("Yes").click({ force: true }); - - cy.get('[data-testid="schema-field-event_name-businessAttribute"]').contains("CypressAttribute").should("not.exist"); - }); + }); + + it("can search related entities by query", () => { + setBusinessAttributeFeatureFlag(); + cy.login(); + cy.visit( + "/business-attribute/urn:li:businessAttribute:37c81832-06e0-40b1-a682-858e1dd0d449/Related%20Entities", + ); + cy.wait("@apiCall").then(() => { + if (!businessAttributeEntityEnabled) { + return; + } + cy.get('[placeholder="Filter entities..."]') + .click() + .type("event_n{enter}"); + cy.wait(5000); + cy.contains("of 0").should("not.exist"); + cy.contains(/of 1/); + cy.contains("event_name"); }); - - it("update the data type of a business attribute", () => { - const businessAttribute="cypressTestAttribute"; - setBusinessAttributeFeatureFlag(); - cy.login(); - cy.visit("/business-attribute"); - cy.wait('@apiCall').then(() => { - if (!businessAttributeEntityEnabled) { - return; - } - cy.wait(3000); - cy.waitTextVisible("Business Attribute"); - cy.wait(3000); - - cy.clickOptionWithText(businessAttribute); - - cy.get('[data-testid="edit-data-type-button"]').within(() => - cy - .get("span[aria-label=edit]") - .trigger("mouseover", { force: true }) - .click({ force: true }) - ); - - cy.get('[data-testid="add-data-type-option"]').get('.ant-select-selection-search-input').click({multiple: true}); - - cy.get('.ant-select-item-option-content') - .contains('STRING') - .click(); - - cy.contains("STRING"); - }); + }); + + it("remove business attribute from dataset", () => { + const urn = + "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)"; + const datasetName = "cypress_logging_events"; + setBusinessAttributeFeatureFlag(); + cy.login(); + cy.visit(`/dataset/${urn}`); + cy.wait("@apiCall").then(() => { + if (!businessAttributeEntityEnabled) { + return; + } + cy.wait(5000); + cy.waitTextVisible(datasetName); + + cy.wait(3000); + cy.get("body").then(($body) => { + if ($body.find('button[aria-label="Close"]').length > 0) { + cy.get('button[aria-label="Close"]').click(); + } + }); + cy.clickOptionWithText("event_name"); + cy.get( + '[data-testid="schema-field-event_name-businessAttribute"]', + ).within(() => + cy + .get("span[aria-label=close]") + .trigger("mouseover", { force: true }) + .click({ force: true }), + ); + cy.contains("Yes").click({ force: true }); + + cy.get('[data-testid="schema-field-event_name-businessAttribute"]') + .contains("CypressAttribute") + .should("not.exist"); + }); + }); + + it("update the data type of a business attribute", () => { + const businessAttribute = "cypressTestAttribute"; + setBusinessAttributeFeatureFlag(); + cy.login(); + cy.visit("/business-attribute"); + cy.wait("@apiCall").then(() => { + if (!businessAttributeEntityEnabled) { + return; + } + cy.wait(3000); + cy.waitTextVisible("Business Attribute"); + cy.wait(3000); + + cy.clickOptionWithText(businessAttribute); + + cy.get('[data-testid="edit-data-type-button"]').within(() => + cy + .get("span[aria-label=edit]") + .trigger("mouseover", { force: true }) + .click({ force: true }), + ); + + cy.get('[data-testid="add-data-type-option"]') + .get(".ant-select-selection-search-input") + .click({ multiple: true }); + + cy.get(".ant-select-item-option-content").contains("STRING").click(); + + cy.contains("STRING"); }); + }); }); diff --git a/smoke-test/tests/cypress/cypress/e2e/domains/nested_domains.js b/smoke-test/tests/cypress/cypress/e2e/domains/nested_domains.js index 3152174e17072..9c320eaf77ae4 100644 --- a/smoke-test/tests/cypress/cypress/e2e/domains/nested_domains.js +++ b/smoke-test/tests/cypress/cypress/e2e/domains/nested_domains.js @@ -1,225 +1,293 @@ const domainName = "CypressNestedDomain"; -//Delete Unecessary Existing Domains +// Delete Unecessary Existing Domains const deleteExisitingDomain = () => { - cy.get('a[href*="urn:li"] span[class^="ant-typography"]') - .should('be.visible') - .its('length') - .then((length) => { - for (let i = 0; i < length - 1; i++) { - cy.get('a[href*="urn:li"] span[class^="ant-typography"]') - .should('be.visible') - .first() - .click({ force: true }); - deleteFromDomainDropdown(); - } - }); - cy.waitTextVisible('Marketing'); - } + cy.get('a[href*="urn:li"] span[class^="ant-typography"]') + .should("be.visible") + .its("length") + .then((length) => { + for (let i = 0; i < length - 1; i++) { + cy.get('a[href*="urn:li"] span[class^="ant-typography"]') + .should("be.visible") + .first() + .click({ force: true }); + deleteFromDomainDropdown(); + } + }); + cy.waitTextVisible("Marketing"); +}; const createDomain = () => { - cy.get('.anticon-plus').first().click() - cy.waitTextVisible('Create New Domain') - cy.get('[data-testid="create-domain-name"]').click().type(domainName); - cy.clickOptionWithTestId("create-domain-button"); - cy.waitTextVisible("Created domain!"); - } + cy.get(".anticon-plus").first().click(); + cy.waitTextVisible("Create New Domain"); + cy.get('[data-testid="create-domain-name"]').click().type(domainName); + cy.clickOptionWithTestId("create-domain-button"); + cy.waitTextVisible("Created domain!"); +}; const moveDomaintoRootLevel = () => { - cy.clickOptionWithText(domainName); - cy.openThreeDotDropdown(); - cy.clickOptionWithTestId("entity-menu-move-button"); - cy.get('[data-testid="move-domain-modal"]').contains("Marketing").click({force: true}); - cy.waitTextVisible('Marketing') - cy.clickOptionWithTestId("move-domain-modal-move-button") - } + cy.clickOptionWithText(domainName); + cy.openThreeDotDropdown(); + cy.clickOptionWithTestId("entity-menu-move-button"); + cy.get('[data-testid="move-domain-modal"]') + .contains("Marketing") + .click({ force: true }); + cy.waitTextVisible("Marketing"); + cy.clickOptionWithTestId("move-domain-modal-move-button"); +}; const moveDomaintoParent = () => { - cy.get('[data-testid="domain-list-item"]').contains("Marketing").prev().click(); - cy.clickOptionWithText(domainName); - cy.waitTextVisible(domainName) - cy.openThreeDotDropdown(); - cy.clickOptionWithTestId("entity-menu-move-button"); - cy.clickOptionWithTestId("move-domain-modal-move-button") - } - -const getDomainList = (domainName) =>{ - cy.contains('span.ant-typography-ellipsis', domainName) - .parent('[data-testid="domain-list-item"]') - .find('[aria-label="right"]') - .click(); - } + cy.get('[data-testid="domain-list-item"]') + .contains("Marketing") + .prev() + .click(); + cy.clickOptionWithText(domainName); + cy.waitTextVisible(domainName); + cy.openThreeDotDropdown(); + cy.clickOptionWithTestId("entity-menu-move-button"); + cy.clickOptionWithTestId("move-domain-modal-move-button"); +}; + +const getDomainList = (domainName) => { + cy.contains("span.ant-typography-ellipsis", domainName) + .parent('[data-testid="domain-list-item"]') + .find('[aria-label="right"]') + .click(); +}; const deleteFromDomainDropdown = () => { - cy.clickOptionWithText('Filters') - cy.openThreeDotDropdown(); - cy.clickOptionWithTestId("entity-menu-delete-button"); - cy.waitTextVisible("Are you sure you want to remove this Domain?"); - cy.clickOptionWithText("Yes"); - } + cy.clickOptionWithText("Filters"); + cy.openThreeDotDropdown(); + cy.clickOptionWithTestId("entity-menu-delete-button"); + cy.waitTextVisible("Are you sure you want to remove this Domain?"); + cy.clickOptionWithText("Yes"); +}; const deleteDomain = () => { - cy.clickOptionWithText(domainName).waitTextVisible('Domains'); - deleteFromDomainDropdown() - } - -const verifyEditAndPerformAddAndRemoveActionForDomain = (entity, action, text, body) =>{ - cy.clickOptionWithText(entity) - cy.clickOptionWithText(action) - cy.get('[data-testid="tag-term-modal-input"]').type(text) - cy.get('[data-testid="tag-term-option"]').contains(text).click() - cy.clickOptionWithText(body) - cy.get('[data-testid="add-tag-term-from-modal-btn"]').click() - cy.waitTextVisible(text) - } - -const clearAndType = (text) =>{ - cy.get('[role="textbox"]').click().clear().type(text) - } - -const clearAndDelete = () =>{ - cy.clickOptionWithText("Edit") - cy.get('[role="textbox"]').click().clear() - cy.clickOptionWithTestId("description-editor-save-button") - cy.waitTextVisible('No documentation') - cy.mouseover('.ant-list-item-meta-content') - cy.get('[aria-label="delete"]').click() - cy.waitTextVisible('Link Removed') - } + cy.clickOptionWithText(domainName).waitTextVisible("Domains"); + deleteFromDomainDropdown(); +}; + +const verifyEditAndPerformAddAndRemoveActionForDomain = ( + entity, + action, + text, + body, +) => { + cy.clickOptionWithText(entity); + cy.clickOptionWithText(action); + cy.get('[data-testid="tag-term-modal-input"]').type(text); + cy.get('[data-testid="tag-term-option"]').contains(text).click(); + cy.clickOptionWithText(body); + cy.get('[data-testid="add-tag-term-from-modal-btn"]').click(); + cy.waitTextVisible(text); +}; + +const clearAndType = (text) => { + cy.get('[role="textbox"]').click().clear().type(text); +}; + +const clearAndDelete = () => { + cy.clickOptionWithText("Edit"); + cy.get('[role="textbox"]').click().clear(); + cy.clickOptionWithTestId("description-editor-save-button"); + cy.waitTextVisible("No documentation"); + cy.mouseover(".ant-list-item-meta-content"); + cy.get('[aria-label="delete"]').click(); + cy.waitTextVisible("Link Removed"); +}; describe("Verify nested domains test functionalities", () => { - beforeEach (() => { + beforeEach(() => { cy.loginWithCredentials(); cy.goToDomainList(); }); - - it("Verify Create a new domain", () => { - deleteExisitingDomain() - cy.get('a[href*="urn:li"] span[class^="ant-typography"]') - .should('be.visible') - createDomain(); - cy.waitTextVisible("Domains"); - }); - - it ("verify Move domain root level to parent level", () => { - cy.waitTextVisible(domainName) - moveDomaintoRootLevel(); - cy.waitTextVisible("Moved Domain!") - cy.goToDomainList(); - cy.waitTextVisible("1 sub-domain"); - }); - it("Verify Move domain parent level to root level", () => { - moveDomaintoParent(); - cy.waitTextVisible("Moved Domain!") - cy.goToDomainList(); - cy.waitTextVisible(domainName); - }); + it("Verify Create a new domain", () => { + deleteExisitingDomain(); + cy.get('a[href*="urn:li"] span[class^="ant-typography"]').should( + "be.visible", + ); + createDomain(); + cy.waitTextVisible("Domains"); + }); - it("Verify Documentation tab by adding editing Description and adding link", () => { - cy.clickOptionWithText(domainName) - cy.clickOptionWithId('#rc-tabs-0-tab-Documentation') - cy.clickFirstOptionWithText("Add Documentation") - clearAndType("Test added") - cy.clickOptionWithTestId("description-editor-save-button") - cy.waitTextVisible('Description Updated') - cy.waitTextVisible('Test added') - cy.clickFirstOptionWithTestId("add-link-button") - cy.waitTextVisible("Add Link") - cy.enterTextInTestId("add-link-modal-url", 'www.test.com') - cy.enterTextInTestId("add-link-modal-label", 'Test Label') - cy.clickOptionWithTestId("add-link-modal-add-button") - cy.waitTextVisible("Test Label") - cy.goToDomainList(); - cy.waitTextVisible("Test added") - cy.clickOptionWithText(domainName) - cy.clickOptionWithText("Documentation") - clearAndDelete() - }) - - it("Verify Right side panel functionalities", () => { - cy.clickOptionWithText(domainName) - cy.waitTextVisible("Filters") - cy.clickOptionWithText("Add Documentation") - clearAndType("Test documentation") - cy.clickOptionWithTestId("description-editor-save-button") - cy.ensureTextNotPresent("Add Documentation") - cy.waitTextVisible('Test documentation') - cy.clickFirstOptionWithSpecificTestId("add-link-button", 1) - cy.waitTextVisible("URL") - cy.enterTextInTestId("add-link-modal-url", 'www.test.com') - cy.enterTextInTestId("add-link-modal-label", 'Test Label') - cy.clickOptionWithTestId("add-link-modal-add-button") - cy.waitTextVisible("Test Label") - cy.clickOptionWithTestId("add-owners-button") - cy.waitTextVisible("Find a user or group") - cy.clickTextOptionWithClass(".rc-virtual-list-holder-inner", Cypress.env('ADMIN_DISPLAYNAME')) - cy.clickOptionWithText("Find a user or group") - cy.clickOptionWithId('#addOwnerButton') - cy.waitTextVisible(Cypress.env('ADMIN_DISPLAYNAME')) - cy.goToDomainList(); - cy.waitTextVisible("Test documentation") - cy.waitTextVisible(Cypress.env('ADMIN_DISPLAYNAME')) - cy.clickOptionWithText(domainName) - cy.clickOptionWithText("Documentation") - clearAndDelete() - }) - - it("Verify Edit Domain Name", () => { - cy.clickFirstOptionWithText(domainName) - cy.clickOptionWithText('Filters') - - //edit name - cy.get('.anticon-edit').eq(0).click().then(() => { - cy.get('.ant-typography-edit-content').type(" Edited").type('{enter}'); - }); - cy.waitTextVisible(domainName + " Edited") - }) - - it("Verify Remove the domain", () => { - deleteDomain(); - cy.goToDomainList(); - cy.ensureTextNotPresent(domainName); - }); + it("Verify Documentation tab by adding editing Description and adding link", () => { + cy.clickOptionWithText(domainName); + cy.clickOptionWithId("#rc-tabs-0-tab-Documentation"); + cy.clickFirstOptionWithText("Add Documentation"); + clearAndType("Test added"); + cy.clickOptionWithTestId("description-editor-save-button"); + cy.waitTextVisible("Description Updated"); + cy.waitTextVisible("Test added"); + cy.clickFirstOptionWithTestId("add-link-button"); + cy.waitTextVisible("Add Link"); + cy.enterTextInTestId("add-link-modal-url", "www.test.com"); + cy.enterTextInTestId("add-link-modal-label", "Test Label"); + cy.clickOptionWithTestId("add-link-modal-add-button"); + cy.waitTextVisible("Test Label"); + cy.goToDomainList(); + cy.waitTextVisible("Test added"); + cy.clickOptionWithText(domainName); + cy.clickOptionWithText("Documentation"); + clearAndDelete(); + }); + + it("Verify Right side panel functionalities", () => { + cy.clickOptionWithText(domainName); + cy.waitTextVisible("Filters"); + cy.clickOptionWithText("Add Documentation"); + clearAndType("Test documentation"); + cy.clickOptionWithTestId("description-editor-save-button"); + cy.ensureTextNotPresent("Add Documentation"); + cy.waitTextVisible("Test documentation"); + cy.clickFirstOptionWithSpecificTestId("add-link-button", 1); + cy.waitTextVisible("URL"); + cy.enterTextInTestId("add-link-modal-url", "www.test.com"); + cy.enterTextInTestId("add-link-modal-label", "Test Label"); + cy.clickOptionWithTestId("add-link-modal-add-button"); + cy.waitTextVisible("Test Label"); + cy.clickOptionWithTestId("add-owners-button"); + cy.waitTextVisible("Find a user or group"); + cy.clickTextOptionWithClass( + ".rc-virtual-list-holder-inner", + Cypress.env("ADMIN_DISPLAYNAME"), + ); + cy.clickOptionWithText("Find a user or group"); + cy.clickOptionWithId("#addOwnerButton"); + cy.waitTextVisible(Cypress.env("ADMIN_DISPLAYNAME")); + cy.goToDomainList(); + cy.waitTextVisible("Test documentation"); + cy.waitTextVisible(Cypress.env("ADMIN_DISPLAYNAME")); + cy.clickOptionWithText(domainName); + cy.clickOptionWithText("Documentation"); + clearAndDelete(); + }); - it('Verify Add and delete sub domain', () => { - cy.clickFirstOptionWithText('Marketing') - cy.clickOptionWithText('Filters') - createDomain(); - cy.ensureTextNotPresent('Created domain!') - getDomainList('Marketing') - cy.clickOptionWithText(domainName) - deleteFromDomainDropdown() - cy.ensureTextNotPresent(domainName) - }) - - it('Verify entities tab with adding and deleting assets and performing some actions', () => { - cy.clickFirstOptionWithText('Marketing'); - cy.clickOptionWithText('Add assets'); - cy.waitTextVisible("Add assets to Domain"); - cy.enterTextInSpecificTestId("search-bar", 3, 'Baz Chart 1') - cy.clickOptionWithSpecificClass('.ant-checkbox', 1) - cy.clickOptionWithId('#continueButton') - cy.waitTextVisible("Added assets to Domain!") - cy.openThreeDotMenu() - cy.clickOptionWithText("Edit") - cy.clickOptionWithSpecificClass('.ant-checkbox', 1) - verifyEditAndPerformAddAndRemoveActionForDomain('Tags', 'Add tags', 'Cypress', 'Add Tags') - cy.clickOptionWithText('Baz Chart 1') - cy.waitTextVisible("Cypress") - cy.waitTextVisible("Marketing") - cy.go('back') - cy.openThreeDotMenu() - cy.clickOptionWithText("Edit") - cy.clickOptionWithSpecificClass('.ant-checkbox', 1) - verifyEditAndPerformAddAndRemoveActionForDomain('Tags', 'Remove tags', 'Cypress', 'Remove Tags') - cy.clickTextOptionWithClass('.ant-dropdown-trigger', 'Domain') - cy.clickOptionWithText('Unset Domain') - cy.clickOptionWithText("Yes"); - cy.clickOptionWithText('Baz Chart 1') - cy.waitTextVisible('Dashboards') - cy.reload() - cy.ensureTextNotPresent("Cypress") - cy.ensureTextNotPresent("Marketing") - }) + it("Verify Move domain parent level to root level", () => { + moveDomaintoParent(); + cy.waitTextVisible("Moved Domain!"); + cy.goToDomainList(); + cy.waitTextVisible(domainName); + }); + + it("Verify Documentation tab by adding editing Description and adding link", () => { + cy.clickOptionWithText(domainName); + cy.clickOptionWithId("#rc-tabs-0-tab-Documentation"); + cy.clickFirstOptionWithText("Add Documentation"); + clearAndType("Test added"); + cy.clickOptionWithTestId("description-editor-save-button"); + cy.waitTextVisible("Description Updated"); + cy.waitTextVisible("Test added"); + cy.clickFirstOptionWithTestId("add-link-button"); + cy.waitTextVisible("Add Link"); + cy.enterTextInTestId("add-link-modal-url", "www.test.com"); + cy.enterTextInTestId("add-link-modal-label", "Test Label"); + cy.clickOptionWithTestId("add-link-modal-add-button"); + cy.waitTextVisible("Test Label"); + cy.goToDomainList(); + cy.waitTextVisible("Test added"); + cy.clickOptionWithText(domainName); + cy.clickOptionWithText("Documentation"); + clearAndDelete(); + }); + + it("Verify Right side panel functionalities", () => { + cy.clickOptionWithText(domainName); + cy.waitTextVisible("Filters"); + cy.clickOptionWithText("Add Documentation"); + clearAndType("Test documentation"); + cy.clickOptionWithTestId("description-editor-save-button"); + cy.ensureTextNotPresent("Add Documentation"); + cy.waitTextVisible("Test documentation"); + cy.clickFirstOptionWithSpecificTestId("add-link-button", 1); + cy.waitTextVisible("URL"); + cy.enterTextInTestId("add-link-modal-url", "www.test.com"); + cy.enterTextInTestId("add-link-modal-label", "Test Label"); + cy.clickOptionWithTestId("add-link-modal-add-button"); + cy.waitTextVisible("Test Label"); + cy.clickOptionWithTestId("add-owners-button"); + cy.waitTextVisible("Find a user or group"); + cy.clickTextOptionWithClass(".rc-virtual-list-holder-inner", "DataHub"); + cy.clickOptionWithText("Find a user or group"); + cy.clickOptionWithId("#addOwnerButton"); + cy.waitTextVisible("DataHub"); + cy.goToDomainList(); + cy.waitTextVisible("Test documentation"); + cy.waitTextVisible("DataHub"); + cy.clickOptionWithText(domainName); + cy.clickOptionWithText("Documentation"); + clearAndDelete(); + }); + + it("Verify Edit Domain Name", () => { + cy.clickFirstOptionWithText(domainName); + cy.clickOptionWithText("Filters"); + + // edit name + cy.get(".anticon-edit") + .eq(0) + .click() + .then(() => { + cy.get(".ant-typography-edit-content").type(" Edited").type("{enter}"); + }); + cy.waitTextVisible(`${domainName} Edited`); + }); + + it("Verify Remove the domain", () => { + deleteDomain(); + cy.goToDomainList(); + cy.ensureTextNotPresent(domainName); + }); + + it("Verify Add and delete sub domain", () => { + cy.clickFirstOptionWithText("Marketing"); + cy.clickOptionWithText("Filters"); + createDomain(); + cy.ensureTextNotPresent("Created domain!"); + getDomainList("Marketing"); + cy.clickOptionWithText(domainName); + deleteFromDomainDropdown(); + cy.ensureTextNotPresent(domainName); + }); + + it("Verify entities tab with adding and deleting assets and performing some actions", () => { + cy.clickFirstOptionWithText("Marketing"); + cy.clickOptionWithText("Add assets"); + cy.waitTextVisible("Add assets to Domain"); + cy.enterTextInSpecificTestId("search-bar", 3, "Baz Chart 1"); + cy.clickOptionWithSpecificClass(".ant-checkbox", 1); + cy.clickOptionWithId("#continueButton"); + cy.waitTextVisible("Added assets to Domain!"); + cy.openThreeDotMenu(); + cy.clickOptionWithText("Edit"); + cy.clickOptionWithSpecificClass(".ant-checkbox", 1); + verifyEditAndPerformAddAndRemoveActionForDomain( + "Tags", + "Add tags", + "Cypress", + "Add Tags", + ); + cy.clickOptionWithText("Baz Chart 1"); + cy.waitTextVisible("Cypress"); + cy.waitTextVisible("Marketing"); + cy.go("back"); + cy.openThreeDotMenu(); + cy.clickOptionWithText("Edit"); + cy.clickOptionWithSpecificClass(".ant-checkbox", 1); + verifyEditAndPerformAddAndRemoveActionForDomain( + "Tags", + "Remove tags", + "Cypress", + "Remove Tags", + ); + cy.clickTextOptionWithClass(".ant-dropdown-trigger", "Domain"); + cy.clickOptionWithText("Unset Domain"); + cy.clickOptionWithText("Yes"); + cy.clickOptionWithText("Baz Chart 1"); + cy.waitTextVisible("Dashboards"); + cy.reload(); + cy.ensureTextNotPresent("Cypress"); + cy.ensureTextNotPresent("Marketing"); + }); }); diff --git a/smoke-test/tests/cypress/cypress/e2e/glossary/glossary.js b/smoke-test/tests/cypress/cypress/e2e/glossary/glossary.js index b0e24d5346fea..d4746032ac607 100644 --- a/smoke-test/tests/cypress/cypress/e2e/glossary/glossary.js +++ b/smoke-test/tests/cypress/cypress/e2e/glossary/glossary.js @@ -1,27 +1,38 @@ -const urn = "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)"; +const urn = + "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)"; const datasetName = "cypress_logging_events"; const glossaryTerm = "CypressGlossaryTerm"; const glossaryTermGroup = "CypressGlossaryGroup"; describe("glossary", () => { - it("go to glossary page, create terms, term group", () => { - cy.loginWithCredentials(); - cy.goToGlossaryList(); - cy.clickOptionWithText("Add Term"); - cy.addViaModal(glossaryTerm, "Create Glossary Term", glossaryTerm, "glossary-entity-modal-create-button"); - cy.clickOptionWithText("Add Term Group"); - cy.addViaModal(glossaryTermGroup, "Create Term Group", glossaryTermGroup, "glossary-entity-modal-create-button"); - cy.addTermToDataset(urn, datasetName, glossaryTerm); - cy.waitTextVisible(glossaryTerm) - cy.goToGlossaryList(); - cy.clickOptionWithText(glossaryTerm); - cy.deleteFromDropdown(); - cy.goToDataset(urn, datasetName); - cy.ensureTextNotPresent(glossaryTerm); - cy.goToGlossaryList(); - cy.clickOptionWithText(glossaryTermGroup); - cy.deleteFromDropdown(); - cy.goToGlossaryList(); - cy.ensureTextNotPresent(glossaryTermGroup); - }); + it("go to glossary page, create terms, term group", () => { + cy.loginWithCredentials(); + cy.goToGlossaryList(); + cy.clickOptionWithText("Add Term"); + cy.addViaModal( + glossaryTerm, + "Create Glossary Term", + glossaryTerm, + "glossary-entity-modal-create-button", + ); + cy.clickOptionWithText("Add Term Group"); + cy.addViaModal( + glossaryTermGroup, + "Create Term Group", + glossaryTermGroup, + "glossary-entity-modal-create-button", + ); + cy.addTermToDataset(urn, datasetName, glossaryTerm); + cy.waitTextVisible(glossaryTerm); + cy.goToGlossaryList(); + cy.clickOptionWithText(glossaryTerm); + cy.deleteFromDropdown(); + cy.goToDataset(urn, datasetName); + cy.ensureTextNotPresent(glossaryTerm); + cy.goToGlossaryList(); + cy.clickOptionWithText(glossaryTermGroup); + cy.deleteFromDropdown(); + cy.goToGlossaryList(); + cy.ensureTextNotPresent(glossaryTermGroup); + }); }); diff --git a/smoke-test/tests/cypress/cypress/e2e/glossary/glossaryTerm.js b/smoke-test/tests/cypress/cypress/e2e/glossary/glossaryTerm.js index 211a93393cec9..943347a403784 100644 --- a/smoke-test/tests/cypress/cypress/e2e/glossary/glossaryTerm.js +++ b/smoke-test/tests/cypress/cypress/e2e/glossary/glossaryTerm.js @@ -1,11 +1,12 @@ const glossaryTerms = { - glossaryTermUrl:"/glossaryTerm/urn:li:glossaryTerm:CypressNode.CypressColumnInfoType/Related%20Entities", - hdfsDataset:"SampleCypressHdfsDataset", - hiveDataset:"cypress_logging_events" + glossaryTermUrl: + "/glossaryTerm/urn:li:glossaryTerm:CypressNode.CypressColumnInfoType/Related%20Entities", + hdfsDataset: "SampleCypressHdfsDataset", + hiveDataset: "cypress_logging_events", }; const applyTagFilter = (tag) => { - cy.get('[aria-label="filter"]').should('be.visible').click() + cy.get('[aria-label="filter"]').should("be.visible").click(); cy.waitTextVisible("Filter"); cy.get(`[data-testid="facet-tags-${tag}"]`).click({ force: true }); }; @@ -13,7 +14,7 @@ const applyTagFilter = (tag) => { const applyAdvancedSearchFilter = (filterType, value) => { cy.get('[aria-label="filter"]').click(); cy.get('[id="search-results-advanced-search"]').click(); - cy.clickOptionWithText('Add Filter'); + cy.clickOptionWithText("Add Filter"); if (filterType === "Tag") { applyTagFilterInSearch(value); @@ -24,17 +25,17 @@ const applyAdvancedSearchFilter = (filterType, value) => { const applyBasicSearchFilter = () => { cy.waitTextVisible("Basic"); - cy.clickOptionWithText('Add Filter'); + cy.clickOptionWithText("Add Filter"); }; const searchByConceptsWithLogicalOperator = (concept1, concept2, operator) => { cy.waitTextVisible("Filters"); applyBasicSearchFilter(); applyTagFilterInSearch(concept1); - cy.clickOptionWithText('Add Filter'); + cy.clickOptionWithText("Add Filter"); applyDescriptionFilterInAdvancedSearch(concept2); cy.get('[title="all filters"]').click(); - cy.clickOptionWithText(operator) + cy.clickOptionWithText(operator); }; // Helper function to apply tag filter in basic search @@ -45,7 +46,9 @@ const applyTagFilterInSearch = (tag) => { // Helper function to apply description filter in advanced search const applyDescriptionFilterInAdvancedSearch = (value) => { - cy.get('[data-testid="adv-search-add-filter-description"]').click({ force: true }); + cy.get('[data-testid="adv-search-add-filter-description"]').click({ + force: true, + }); cy.get('[data-testid="edit-text-input"]').type(value); cy.get('[data-testid="edit-text-done-btn"]').click({ force: true }); }; @@ -57,7 +60,10 @@ describe("glossaryTerm", () => { }); it("can search related entities by query", () => { - cy.get('[placeholder="Filter entities..."]').should("be.visible").click().type("logging{enter}"); + cy.get('[placeholder="Filter entities..."]') + .should("be.visible") + .click() + .type("logging{enter}"); cy.waitTextVisible(glossaryTerms.hiveDataset); cy.contains(glossaryTerms.hdfsDataset).should("not.exist"); }); @@ -73,21 +79,21 @@ describe("glossaryTerm", () => { cy.waitTextVisible(glossaryTerms.hdfsDataset); applyAdvancedSearchFilter("Tag", "Cypress2"); cy.waitTextVisible(glossaryTerms.hdfsDataset); - cy.clickOptionWithText(glossaryTerms.hdfsDataset) + cy.clickOptionWithText(glossaryTerms.hdfsDataset); cy.waitTextVisible("Cypress 2"); }); it("can search related entities by AND-ing two concepts using search", () => { cy.waitTextVisible(glossaryTerms.hdfsDataset); applyAdvancedSearchFilter(); - cy.clickOptionWithText('Add Filter'); + cy.clickOptionWithText("Add Filter"); cy.get('[data-testid="adv-search-add-filter-description"]').click({ force: true, }); cy.get('[data-testid="edit-text-input"]').type("my hdfs dataset"); cy.get('[data-testid="edit-text-done-btn"]').click({ force: true }); cy.waitTextVisible(glossaryTerms.hdfsDataset); - cy.clickOptionWithText(glossaryTerms.hdfsDataset) + cy.clickOptionWithText(glossaryTerms.hdfsDataset); cy.waitTextVisible("my hdfs dataset"); }); @@ -99,4 +105,4 @@ describe("glossaryTerm", () => { cy.waitTextVisible(glossaryTerms.hdfsDataset); cy.waitTextVisible(glossaryTerms.hiveDataset); }); -}); \ No newline at end of file +}); diff --git a/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js b/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js index f6f1ff5949d25..553c0fb1626bc 100644 --- a/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js +++ b/smoke-test/tests/cypress/cypress/e2e/glossary/glossary_navigation.js @@ -10,17 +10,29 @@ const createTerm = (glossaryTerm) => { }; const navigateToParentAndCheckTermGroup = (parentGroup, termGroup) => { - cy.get('[data-testid="glossary-browser-sidebar"]').contains(parentGroup).click(); - cy.get('*[class^="GlossaryEntitiesList"]').contains(termGroup).should("be.visible"); + cy.get('[data-testid="glossary-browser-sidebar"]') + .contains(parentGroup) + .click(); + cy.get('*[class^="GlossaryEntitiesList"]') + .contains(termGroup) + .should("be.visible"); }; -const moveGlossaryEntityToGroup = (sourceEntity, targetEntity, confirmationMsg) => { +const moveGlossaryEntityToGroup = ( + sourceEntity, + targetEntity, + confirmationMsg, +) => { cy.clickOptionWithText(sourceEntity); - cy.get('[data-testid="entity-header-dropdown"]').should('be.visible'); + cy.get('[data-testid="entity-header-dropdown"]').should("be.visible"); cy.openThreeDotDropdown(); cy.clickOptionWithText("Move"); - cy.get('[data-testid="move-glossary-entity-modal"]').contains(targetEntity).click({ force: true }); - cy.get('[data-testid="move-glossary-entity-modal"]').contains(targetEntity).should("be.visible"); + cy.get('[data-testid="move-glossary-entity-modal"]') + .contains(targetEntity) + .click({ force: true }); + cy.get('[data-testid="move-glossary-entity-modal"]') + .contains(targetEntity) + .should("be.visible"); cy.clickOptionWithTestId("glossary-entity-modal-move-button"); cy.waitTextVisible(confirmationMsg); }; @@ -42,7 +54,11 @@ describe("glossary sidebar navigation test", () => { cy.createGlossaryTermGroup(glossaryTermGroup); cy.clickOptionWithTestId("add-term-button"); createTerm(glossaryTerm); - moveGlossaryEntityToGroup(glossaryTerm, glossaryTermGroup, `Moved Glossary Term!`); + moveGlossaryEntityToGroup( + glossaryTerm, + glossaryTermGroup, + `Moved Glossary Term!`, + ); navigateToParentAndCheckTermGroup(glossaryTermGroup, glossaryTerm); // Create another term and move it to the same term group @@ -50,24 +66,42 @@ describe("glossary sidebar navigation test", () => { cy.openThreeDotDropdown(); cy.clickOptionWithTestId("entity-menu-add-term-button"); createTerm(glossarySecondTerm); - moveGlossaryEntityToGroup(glossarySecondTerm, glossaryTermGroup, `Moved Glossary Term!`); + moveGlossaryEntityToGroup( + glossarySecondTerm, + glossaryTermGroup, + `Moved Glossary Term!`, + ); navigateToParentAndCheckTermGroup(glossaryTermGroup, glossarySecondTerm); // Switch between terms and ensure the "Properties" tab is active cy.clickOptionWithText(glossaryTerm); - cy.get('[data-testid="entity-tab-headers-test-id"]').contains("Properties").click({ force: true }); - cy.get('[data-node-key="Properties"]').contains("Properties").should("have.attr", "aria-selected", "true"); + cy.get('[data-testid="entity-tab-headers-test-id"]') + .contains("Properties") + .click({ force: true }); + cy.get('[data-node-key="Properties"]') + .contains("Properties") + .should("have.attr", "aria-selected", "true"); cy.clickOptionWithText(glossarySecondTerm); - cy.get('[data-node-key="Properties"]').contains("Properties").should("have.attr", "aria-selected", "true"); + cy.get('[data-node-key="Properties"]') + .contains("Properties") + .should("have.attr", "aria-selected", "true"); // Move a term group from the root level to be under a parent term group cy.goToGlossaryList(); - moveGlossaryEntityToGroup(glossaryTermGroup, glossaryParentGroup, 'Moved Term Group!'); + moveGlossaryEntityToGroup( + glossaryTermGroup, + glossaryParentGroup, + "Moved Term Group!", + ); navigateToParentAndCheckTermGroup(glossaryParentGroup, glossaryTermGroup); // Delete glossary terms and term group deleteGlossaryTerm(glossaryParentGroup, glossaryTermGroup, glossaryTerm); - deleteGlossaryTerm(glossaryParentGroup, glossaryTermGroup, glossarySecondTerm); + deleteGlossaryTerm( + glossaryParentGroup, + glossaryTermGroup, + glossarySecondTerm, + ); cy.goToGlossaryList(); cy.clickOptionWithText(glossaryParentGroup); diff --git a/smoke-test/tests/cypress/cypress/e2e/home/home.js b/smoke-test/tests/cypress/cypress/e2e/home/home.js index 05140486e189b..8b40cfaae41af 100644 --- a/smoke-test/tests/cypress/cypress/e2e/home/home.js +++ b/smoke-test/tests/cypress/cypress/e2e/home/home.js @@ -1,39 +1,44 @@ import { aliasQuery, hasOperationName } from "../utils"; -describe('home', () => { - let businessAttributeEntityEnabled; +describe("home", () => { + let businessAttributeEntityEnabled; - beforeEach(() => { - cy.intercept("POST", "/api/v2/graphql", (req) => { - aliasQuery(req, "appConfig"); - }); + beforeEach(() => { + cy.intercept("POST", "/api/v2/graphql", (req) => { + aliasQuery(req, "appConfig"); }); - - const setBusinessAttributeFeatureFlag = () => { - cy.intercept("POST", "/api/v2/graphql", (req) => { - if (hasOperationName(req, "appConfig")) { - req.reply((res) => { - businessAttributeEntityEnabled = res.body.data.appConfig.featureFlags.businessAttributeEntityEnabled; - return res; - }); - } - }).as('apiCall'); - }; - it('home page shows ', () => { - setBusinessAttributeFeatureFlag(); - cy.login(); - cy.visit('/'); - // cy.get('img[src="/assets/platforms/datahublogo.png"]').should('exist'); - cy.get('[data-testid="entity-type-browse-card-DATASET"]').should('exist'); - cy.get('[data-testid="entity-type-browse-card-DASHBOARD"]').should('exist'); - cy.get('[data-testid="entity-type-browse-card-CHART"]').should('exist'); - cy.get('[data-testid="entity-type-browse-card-DATA_FLOW"]').should('exist'); - cy.get('[data-testid="entity-type-browse-card-GLOSSARY_TERM"]').should('exist'); - cy.wait('@apiCall').then(() => { - if (!businessAttributeEntityEnabled) { - return; - } - cy.get('[data-testid="entity-type-browse-card-BUSINESS_ATTRIBUTE"]').should('exist'); - }); + }); + + const setBusinessAttributeFeatureFlag = () => { + cy.intercept("POST", "/api/v2/graphql", (req) => { + if (hasOperationName(req, "appConfig")) { + req.reply((res) => { + businessAttributeEntityEnabled = + res.body.data.appConfig.featureFlags.businessAttributeEntityEnabled; + return res; + }); + } + }).as("apiCall"); + }; + it("home page shows ", () => { + setBusinessAttributeFeatureFlag(); + cy.login(); + cy.visit("/"); + // cy.get('img[src="/assets/platforms/datahublogo.png"]').should('exist'); + cy.get('[data-testid="entity-type-browse-card-DATASET"]').should("exist"); + cy.get('[data-testid="entity-type-browse-card-DASHBOARD"]').should("exist"); + cy.get('[data-testid="entity-type-browse-card-CHART"]').should("exist"); + cy.get('[data-testid="entity-type-browse-card-DATA_FLOW"]').should("exist"); + cy.get('[data-testid="entity-type-browse-card-GLOSSARY_TERM"]').should( + "exist", + ); + cy.wait("@apiCall").then(() => { + if (!businessAttributeEntityEnabled) { + return; + } + cy.get( + '[data-testid="entity-type-browse-card-BUSINESS_ATTRIBUTE"]', + ).should("exist"); }); - }) + }); +}); diff --git a/smoke-test/tests/cypress/cypress/e2e/lineage/download_lineage_results.js b/smoke-test/tests/cypress/cypress/e2e/lineage/download_lineage_results.js index ed4167b87c506..325902cbc91ab 100644 --- a/smoke-test/tests/cypress/cypress/e2e/lineage/download_lineage_results.js +++ b/smoke-test/tests/cypress/cypress/e2e/lineage/download_lineage_results.js @@ -1,83 +1,90 @@ -const test_dataset = "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleCypressKafkaDataset,PROD)"; +const test_dataset = + "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleCypressKafkaDataset,PROD)"; const first_degree = [ - "urn:li:chart:(looker,cypress_baz1)", - "urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleCypressHdfsDataset,PROD)", - "urn:li:mlFeature:(cypress-test-2,some-cypress-feature-1)" + "urn:li:chart:(looker,cypress_baz1)", + "urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleCypressHdfsDataset,PROD)", + "urn:li:mlFeature:(cypress-test-2,some-cypress-feature-1)", ]; const second_degree = [ - "urn:li:chart:(looker,cypress_baz2)", - "urn:li:dashboard:(looker,cypress_baz)", - "urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)", - "urn:li:mlPrimaryKey:(cypress-test-2,some-cypress-feature-2)" + "urn:li:chart:(looker,cypress_baz2)", + "urn:li:dashboard:(looker,cypress_baz)", + "urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)", + "urn:li:mlPrimaryKey:(cypress-test-2,some-cypress-feature-2)", ]; const third_degree_plus = [ - "urn:li:dataJob:(urn:li:dataFlow:(airflow,cypress_dag_abc,PROD),cypress_task_123)", - "urn:li:dataJob:(urn:li:dataFlow:(airflow,cypress_dag_abc,PROD),cypress_task_456)", - "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_created,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_created_no_tag,PROD)", - "urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_deleted,PROD)" + "urn:li:dataJob:(urn:li:dataFlow:(airflow,cypress_dag_abc,PROD),cypress_task_123)", + "urn:li:dataJob:(urn:li:dataFlow:(airflow,cypress_dag_abc,PROD),cypress_task_456)", + "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_created,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_created_no_tag,PROD)", + "urn:li:dataset:(urn:li:dataPlatform:hive,fct_cypress_users_deleted,PROD)", ]; const downloadCsvFile = (filename) => { - cy.get('[data-testid="three-dot-menu"]').click(); - cy.get('[data-testid="download-as-csv-menu-item"]').click(); - cy.get('[data-testid="download-as-csv-input"]').clear().type(filename); - cy.get('[data-testid="csv-modal-download-button"]').click().wait(5000); - cy.ensureTextNotPresent("Creating CSV to download"); + cy.get('[data-testid="three-dot-menu"]').click(); + cy.get('[data-testid="download-as-csv-menu-item"]').click(); + cy.get('[data-testid="download-as-csv-input"]').clear().type(filename); + cy.get('[data-testid="csv-modal-download-button"]').click().wait(5000); + cy.ensureTextNotPresent("Creating CSV to download"); }; describe("download lineage results to .csv file", () => { - beforeEach(() => { - cy.on('uncaught:exception', (err, runnable) => { return false; }); - }); + beforeEach(() => { + cy.on("uncaught:exception", (err, runnable) => false); + }); - it("download and verify lineage results for 1st, 2nd and 3+ degree of dependencies", () => { - cy.loginWithCredentials(); - cy.goToDataset(test_dataset,"SampleCypressKafkaDataset"); - cy.openEntityTab("Lineage"); + it("download and verify lineage results for 1st, 2nd and 3+ degree of dependencies", () => { + cy.loginWithCredentials(); + cy.goToDataset(test_dataset, "SampleCypressKafkaDataset"); + cy.openEntityTab("Lineage"); - // Verify 1st degree of dependencies - cy.contains(/1 - [3-4] of [3-4]/); - downloadCsvFile("first_degree_results.csv"); - let first_degree_csv = cy.readFile('cypress/downloads/first_degree_results.csv'); - first_degree.forEach(function (urn) { - first_degree_csv.should('contain', urn) - }); - second_degree.forEach(function (urn) { - first_degree_csv.should('not.contain', urn) - }); - third_degree_plus.forEach(function (urn) { - first_degree_csv.should('not.contain', urn); - }); + // Verify 1st degree of dependencies + cy.contains(/1 - [3-4] of [3-4]/); + downloadCsvFile("first_degree_results.csv"); + const first_degree_csv = cy.readFile( + "cypress/downloads/first_degree_results.csv", + ); + first_degree.forEach((urn) => { + first_degree_csv.should("contain", urn); + }); + second_degree.forEach((urn) => { + first_degree_csv.should("not.contain", urn); + }); + third_degree_plus.forEach((urn) => { + first_degree_csv.should("not.contain", urn); + }); - // Verify 1st and 2nd degree of dependencies - cy.get('[data-testid="facet-degree-2"]').click().wait(5000); - cy.contains(/1 - [7-8] of [7-8]/); - downloadCsvFile("second_degree_results.csv"); - let second_degree_csv = cy.readFile('cypress/downloads/second_degree_results.csv'); - first_degree.forEach(function (urn) { - second_degree_csv.should('contain', urn) - }); - second_degree.forEach(function (urn) { - second_degree_csv.should('contain', urn) - }); - third_degree_plus.forEach(function (urn) { - second_degree_csv.should('not.contain', urn); - }); + // Verify 1st and 2nd degree of dependencies + cy.get('[data-testid="facet-degree-2"]').click().wait(5000); + cy.contains(/1 - [7-8] of [7-8]/); + downloadCsvFile("second_degree_results.csv"); + const second_degree_csv = cy.readFile( + "cypress/downloads/second_degree_results.csv", + ); + first_degree.forEach((urn) => { + second_degree_csv.should("contain", urn); + }); + second_degree.forEach((urn) => { + second_degree_csv.should("contain", urn); + }); + third_degree_plus.forEach((urn) => { + second_degree_csv.should("not.contain", urn); + }); - // Verify 1st 2nd and 3+ degree of dependencies(Verify multi page download) - cy.get('[data-testid="facet-degree-3+"]').click().wait(5000); - cy.contains(/1 - 10 of 1[3-4]/); - downloadCsvFile("third_plus_degree_results.csv"); - let third_degree_csv = cy.readFile('cypress/downloads/third_plus_degree_results.csv'); - first_degree.forEach(function (urn) { - third_degree_csv.should('contain', urn) - }); - second_degree.forEach(function (urn) { - third_degree_csv.should('contain', urn) - }); - third_degree_plus.forEach(function (urn) { - third_degree_csv.should('contain', urn); - }); + // Verify 1st 2nd and 3+ degree of dependencies(Verify multi page download) + cy.get('[data-testid="facet-degree-3+"]').click().wait(5000); + cy.contains(/1 - 10 of 1[3-4]/); + downloadCsvFile("third_plus_degree_results.csv"); + const third_degree_csv = cy.readFile( + "cypress/downloads/third_plus_degree_results.csv", + ); + first_degree.forEach((urn) => { + third_degree_csv.should("contain", urn); + }); + second_degree.forEach((urn) => { + third_degree_csv.should("contain", urn); + }); + third_degree_plus.forEach((urn) => { + third_degree_csv.should("contain", urn); }); -}); \ No newline at end of file + }); +}); diff --git a/smoke-test/tests/cypress/cypress/e2e/lineage/impact_analysis.js b/smoke-test/tests/cypress/cypress/e2e/lineage/impact_analysis.js index 784ccf8f0f87d..1b4367fcb8bbc 100644 --- a/smoke-test/tests/cypress/cypress/e2e/lineage/impact_analysis.js +++ b/smoke-test/tests/cypress/cypress/e2e/lineage/impact_analysis.js @@ -2,38 +2,38 @@ import { getTimestampMillisNumDaysAgo } from "../../support/commands"; const JAN_1_2021_TIMESTAMP = 1609553357755; const JAN_1_2022_TIMESTAMP = 1641089357755; -const DATASET_URN = 'urn:li:dataset:(urn:li:dataPlatform:kafka,SampleCypressKafkaDataset,PROD)'; +const DATASET_URN = + "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleCypressKafkaDataset,PROD)"; const TIMESTAMP_MILLIS_14_DAYS_AGO = getTimestampMillisNumDaysAgo(14); const TIMESTAMP_MILLIS_7_DAYS_AGO = getTimestampMillisNumDaysAgo(7); const TIMESTAMP_MILLIS_NOW = getTimestampMillisNumDaysAgo(0); -const GNP_DATASET_URN = "urn:li:dataset:(urn:li:dataPlatform:snowflake,economic_data.gnp,PROD)"; -const TRANSACTION_ETL_URN = "urn:li:dataJob:(urn:li:dataFlow:(airflow,bq_etl,prod),transaction_etl)"; -const MONTHLY_TEMPERATURE_DATASET_URN = "urn:li:dataset:(urn:li:dataPlatform:snowflake,climate.monthly_temperature,PROD)"; - +const GNP_DATASET_URN = + "urn:li:dataset:(urn:li:dataPlatform:snowflake,economic_data.gnp,PROD)"; +const TRANSACTION_ETL_URN = + "urn:li:dataJob:(urn:li:dataFlow:(airflow,bq_etl,prod),transaction_etl)"; +const MONTHLY_TEMPERATURE_DATASET_URN = + "urn:li:dataset:(urn:li:dataPlatform:snowflake,climate.monthly_temperature,PROD)"; const startAtDataSetLineage = () => { - cy.login(); - cy.goToDataset( - DATASET_URN, - "SampleCypressKafkaDataset" - ); - cy.openEntityTab("Lineage") -} + cy.login(); + cy.goToDataset(DATASET_URN, "SampleCypressKafkaDataset"); + cy.openEntityTab("Lineage"); +}; describe("impact analysis", () => { beforeEach(() => { - cy.on('uncaught:exception', (err, runnable) => { return false; }); + cy.on("uncaught:exception", (err, runnable) => false); }); it("can see 1 hop of lineage by default", () => { - startAtDataSetLineage() + startAtDataSetLineage(); cy.ensureTextNotPresent("User Creations"); cy.ensureTextNotPresent("User Deletions"); }); it("can see lineage multiple hops away", () => { - startAtDataSetLineage() + startAtDataSetLineage(); // click to show more relationships now that we default to 1 degree of dependency cy.clickOptionWithText("3+"); @@ -42,7 +42,7 @@ describe("impact analysis", () => { }); it("can filter the lineage results as well", () => { - startAtDataSetLineage() + startAtDataSetLineage(); // click to show more relationships now that we default to 1 degree of dependency cy.clickOptionWithText("3+"); @@ -50,11 +50,11 @@ describe("impact analysis", () => { cy.clickOptionWithText("Add Filter"); - cy.clickOptionWithTestId('adv-search-add-filter-description'); + cy.clickOptionWithTestId("adv-search-add-filter-description"); cy.get('[data-testid="edit-text-input"]').type("fct_users_deleted"); - cy.clickOptionWithTestId('edit-text-done-btn'); + cy.clickOptionWithTestId("edit-text-done-btn"); cy.ensureTextNotPresent("User Creations"); cy.waitTextVisible("User Deletions"); @@ -63,7 +63,7 @@ describe("impact analysis", () => { it("can view column level impact analysis and turn it off", () => { cy.login(); cy.visit( - `/dataset/${DATASET_URN}/Lineage?column=%5Bversion%3D2.0%5D.%5Btype%3Dboolean%5D.field_bar&is_lineage_mode=false` + `/dataset/${DATASET_URN}/Lineage?column=%5Bversion%3D2.0%5D.%5Btype%3Dboolean%5D.field_bar&is_lineage_mode=false`, ); // impact analysis can take a beat- don't want to time out here @@ -85,11 +85,10 @@ describe("impact analysis", () => { cy.contains("Baz Chart 1"); }); - it("can filter lineage edges by time", () => { cy.login(); cy.visit( - `/dataset/${DATASET_URN}/Lineage?filter_degree___false___EQUAL___0=1&is_lineage_mode=false&page=1&unionType=0&start_time_millis=${JAN_1_2021_TIMESTAMP}&end_time_millis=${JAN_1_2022_TIMESTAMP}` + `/dataset/${DATASET_URN}/Lineage?filter_degree___false___EQUAL___0=1&is_lineage_mode=false&page=1&unionType=0&start_time_millis=${JAN_1_2021_TIMESTAMP}&end_time_millis=${JAN_1_2022_TIMESTAMP}`, ); // impact analysis can take a beat- don't want to time out here @@ -105,7 +104,7 @@ describe("impact analysis", () => { cy.login(); // Between 14 days ago and 7 days ago, only transactions was an input cy.visit( - `/tasks/${TRANSACTION_ETL_URN}/Lineage?filter_degree___false___EQUAL___0=1&is_lineage_mode=false&page=1&unionType=0&start_time_millis=${TIMESTAMP_MILLIS_14_DAYS_AGO}&end_time_millis=${TIMESTAMP_MILLIS_7_DAYS_AGO}` + `/tasks/${TRANSACTION_ETL_URN}/Lineage?filter_degree___false___EQUAL___0=1&is_lineage_mode=false&page=1&unionType=0&start_time_millis=${TIMESTAMP_MILLIS_14_DAYS_AGO}&end_time_millis=${TIMESTAMP_MILLIS_7_DAYS_AGO}`, ); // Downstream cy.contains("aggregated"); @@ -115,7 +114,7 @@ describe("impact analysis", () => { cy.contains("user_profile").should("not.exist"); // 1 day ago, factor_income was removed from the join cy.visit( - `/tasks/${TRANSACTION_ETL_URN}/Lineage?filter_degree___false___EQUAL___0=1&is_lineage_mode=false&page=1&unionType=0&start_time_millis=${TIMESTAMP_MILLIS_7_DAYS_AGO}&end_time_millis=${TIMESTAMP_MILLIS_NOW}` + `/tasks/${TRANSACTION_ETL_URN}/Lineage?filter_degree___false___EQUAL___0=1&is_lineage_mode=false&page=1&unionType=0&start_time_millis=${TIMESTAMP_MILLIS_7_DAYS_AGO}&end_time_millis=${TIMESTAMP_MILLIS_NOW}`, ); // Downstream cy.contains("aggregated"); @@ -129,14 +128,14 @@ describe("impact analysis", () => { cy.login(); // Between 14 days ago and 7 days ago, only temperature_etl_1 was an iput cy.visit( - `/dataset/${MONTHLY_TEMPERATURE_DATASET_URN}/Lineage?filter_degree___false___EQUAL___0=1&is_lineage_mode=false&page=1&unionType=0&start_time_millis=${TIMESTAMP_MILLIS_14_DAYS_AGO}&end_time_millis=${TIMESTAMP_MILLIS_7_DAYS_AGO}` + `/dataset/${MONTHLY_TEMPERATURE_DATASET_URN}/Lineage?filter_degree___false___EQUAL___0=1&is_lineage_mode=false&page=1&unionType=0&start_time_millis=${TIMESTAMP_MILLIS_14_DAYS_AGO}&end_time_millis=${TIMESTAMP_MILLIS_7_DAYS_AGO}`, ); cy.lineageTabClickOnUpstream(); cy.contains("temperature_etl_1"); cy.contains("temperature_etl_2").should("not.exist"); // Since 7 days ago, temperature_etl_1 has been replaced by temperature_etl_2 cy.visit( - `/dataset/${MONTHLY_TEMPERATURE_DATASET_URN}/Lineage?filter_degree___false___EQUAL___0=1&is_lineage_mode=false&page=1&unionType=0&start_time_millis=${TIMESTAMP_MILLIS_7_DAYS_AGO}&end_time_millis=${TIMESTAMP_MILLIS_NOW}` + `/dataset/${MONTHLY_TEMPERATURE_DATASET_URN}/Lineage?filter_degree___false___EQUAL___0=1&is_lineage_mode=false&page=1&unionType=0&start_time_millis=${TIMESTAMP_MILLIS_7_DAYS_AGO}&end_time_millis=${TIMESTAMP_MILLIS_NOW}`, ); cy.lineageTabClickOnUpstream(); cy.contains("temperature_etl_1").should("not.exist"); @@ -147,14 +146,14 @@ describe("impact analysis", () => { cy.login(); // 8 days ago, both gdp and factor_income were joined to create gnp cy.visit( - `/dataset/${GNP_DATASET_URN}/Lineage?filter_degree___false___EQUAL___0=1&is_lineage_mode=false&page=1&unionType=0&start_time_millis=${TIMESTAMP_MILLIS_14_DAYS_AGO}&end_time_millis=${TIMESTAMP_MILLIS_NOW}` + `/dataset/${GNP_DATASET_URN}/Lineage?filter_degree___false___EQUAL___0=1&is_lineage_mode=false&page=1&unionType=0&start_time_millis=${TIMESTAMP_MILLIS_14_DAYS_AGO}&end_time_millis=${TIMESTAMP_MILLIS_NOW}`, ); cy.lineageTabClickOnUpstream(); cy.contains("gdp"); cy.contains("factor_income"); // 1 day ago, factor_income was removed from the join cy.visit( - `/dataset/${GNP_DATASET_URN}/Lineage?filter_degree___false___EQUAL___0=1&is_lineage_mode=false&page=1&unionType=0&start_time_millis=${TIMESTAMP_MILLIS_7_DAYS_AGO}&end_time_millis=${TIMESTAMP_MILLIS_NOW}` + `/dataset/${GNP_DATASET_URN}/Lineage?filter_degree___false___EQUAL___0=1&is_lineage_mode=false&page=1&unionType=0&start_time_millis=${TIMESTAMP_MILLIS_7_DAYS_AGO}&end_time_millis=${TIMESTAMP_MILLIS_NOW}`, ); cy.lineageTabClickOnUpstream(); cy.contains("gdp"); diff --git a/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_column_level.js b/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_column_level.js index 2a8fe045f154e..cedf3c6c051be 100644 --- a/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_column_level.js +++ b/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_column_level.js @@ -1,51 +1,50 @@ -const DATASET_ENTITY_TYPE = 'dataset'; -const DATASET_URN = 'urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)'; +const DATASET_ENTITY_TYPE = "dataset"; +const DATASET_URN = + "urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)"; describe("column-level lineage graph test", () => { - - it("navigate to lineage graph view and verify that column-level lineage is showing correctly", () => { - cy.login(); - cy.goToEntityLineageGraph(DATASET_ENTITY_TYPE, DATASET_URN); - //verify columns not shown by default - cy.waitTextVisible("SampleCypressHdfs"); - cy.waitTextVisible("SampleCypressHive"); - cy.waitTextVisible("cypress_logging"); - cy.ensureTextNotPresent("shipment_info"); - cy.ensureTextNotPresent("field_foo"); - cy.ensureTextNotPresent("field_baz"); - cy.ensureTextNotPresent("event_name"); - cy.ensureTextNotPresent("event_data"); - cy.ensureTextNotPresent("timestamp"); - cy.ensureTextNotPresent("browser"); - cy.clickOptionWithTestId("column-toggle") - //verify columns appear and belong co correct dataset - cy.waitTextVisible("shipment_info"); - cy.waitTextVisible("shipment_info.date"); - cy.waitTextVisible("shipment_info.target"); - cy.waitTextVisible("shipment_info.destination"); - cy.waitTextVisible("shipment_info.geo_info"); - cy.waitTextVisible("field_foo"); - cy.waitTextVisible("field_baz"); - cy.waitTextVisible("event_name"); - cy.waitTextVisible("event_data"); - cy.waitTextVisible("timestamp"); - cy.waitTextVisible("browser"); - //verify columns can be hidden and shown again - cy.contains("Hide").click({ force:true }); - cy.ensureTextNotPresent("field_foo"); - cy.ensureTextNotPresent("field_baz"); - cy.get("[aria-label='down']").eq(1).click({ force:true }); - cy.waitTextVisible("field_foo"); - cy.waitTextVisible("field_baz"); - //verify columns can be disabled successfully - cy.clickOptionWithTestId("column-toggle") - cy.ensureTextNotPresent("shipment_info"); - cy.ensureTextNotPresent("field_foo"); - cy.ensureTextNotPresent("field_baz"); - cy.ensureTextNotPresent("event_name"); - cy.ensureTextNotPresent("event_data"); - cy.ensureTextNotPresent("timestamp"); - cy.ensureTextNotPresent("browser"); - }); - -}); \ No newline at end of file + it("navigate to lineage graph view and verify that column-level lineage is showing correctly", () => { + cy.login(); + cy.goToEntityLineageGraph(DATASET_ENTITY_TYPE, DATASET_URN); + // verify columns not shown by default + cy.waitTextVisible("SampleCypressHdfs"); + cy.waitTextVisible("SampleCypressHive"); + cy.waitTextVisible("cypress_logging"); + cy.ensureTextNotPresent("shipment_info"); + cy.ensureTextNotPresent("field_foo"); + cy.ensureTextNotPresent("field_baz"); + cy.ensureTextNotPresent("event_name"); + cy.ensureTextNotPresent("event_data"); + cy.ensureTextNotPresent("timestamp"); + cy.ensureTextNotPresent("browser"); + cy.clickOptionWithTestId("column-toggle"); + // verify columns appear and belong co correct dataset + cy.waitTextVisible("shipment_info"); + cy.waitTextVisible("shipment_info.date"); + cy.waitTextVisible("shipment_info.target"); + cy.waitTextVisible("shipment_info.destination"); + cy.waitTextVisible("shipment_info.geo_info"); + cy.waitTextVisible("field_foo"); + cy.waitTextVisible("field_baz"); + cy.waitTextVisible("event_name"); + cy.waitTextVisible("event_data"); + cy.waitTextVisible("timestamp"); + cy.waitTextVisible("browser"); + // verify columns can be hidden and shown again + cy.contains("Hide").click({ force: true }); + cy.ensureTextNotPresent("field_foo"); + cy.ensureTextNotPresent("field_baz"); + cy.get("[aria-label='down']").eq(1).click({ force: true }); + cy.waitTextVisible("field_foo"); + cy.waitTextVisible("field_baz"); + // verify columns can be disabled successfully + cy.clickOptionWithTestId("column-toggle"); + cy.ensureTextNotPresent("shipment_info"); + cy.ensureTextNotPresent("field_foo"); + cy.ensureTextNotPresent("field_baz"); + cy.ensureTextNotPresent("event_name"); + cy.ensureTextNotPresent("event_data"); + cy.ensureTextNotPresent("timestamp"); + cy.ensureTextNotPresent("browser"); + }); +}); diff --git a/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_column_path.js b/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_column_path.js index 37ca62c8d1229..7691b04d91f32 100644 --- a/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_column_path.js +++ b/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_column_path.js @@ -1,68 +1,89 @@ import { aliasQuery } from "../utils"; -const DATASET_ENTITY_TYPE = 'dataset'; -const DATASET_URN = 'urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleCypressHdfsDataset,PROD)'; -const DOWNSTREAM_DATASET_URN = "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleCypressKafkaDataset,PROD)"; -const upstreamColumn = '[data-testid="node-urn:li:dataset:(urn:li:dataPlatform:kafka,SampleCypressKafkaDataset,PROD)-Upstream"] text'; -const downstreamColumn = '[data-testid="node-urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleCypressHdfsDataset,PROD)-Downstream"] text'; + +const DATASET_ENTITY_TYPE = "dataset"; +const DATASET_URN = + "urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleCypressHdfsDataset,PROD)"; +const DOWNSTREAM_DATASET_URN = + "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleCypressKafkaDataset,PROD)"; +const upstreamColumn = + '[data-testid="node-urn:li:dataset:(urn:li:dataPlatform:kafka,SampleCypressKafkaDataset,PROD)-Upstream"] text'; +const downstreamColumn = + '[data-testid="node-urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleCypressHdfsDataset,PROD)-Downstream"] text'; const verifyColumnPathModal = (from, to) => { - cy.get('[data-testid="entity-paths-modal"]').contains(from).should("be.visible"); - cy.get('[data-testid="entity-paths-modal"]').contains(to).should("be.visible"); + cy.get('[data-testid="entity-paths-modal"]') + .contains(from) + .should("be.visible"); + cy.get('[data-testid="entity-paths-modal"]') + .contains(to) + .should("be.visible"); }; describe("column-Level lineage and impact analysis path test", () => { - beforeEach(() => { - cy.on('uncaught:exception', (err, runnable) => { return false; }); - cy.intercept("POST", "/api/v2/graphql", (req) => { - aliasQuery(req, "appConfig"); - }); - }); + beforeEach(() => { + cy.on("uncaught:exception", (err, runnable) => false); + cy.intercept("POST", "/api/v2/graphql", (req) => { + aliasQuery(req, "appConfig"); + }); + }); - it("verify column-level lineage path at lineage praph and impact analysis ", () => { - // Open dataset with column-level lineage configured an navigate to lineage tab -> visualize lineage - cy.loginWithCredentials(); - cy.goToEntityLineageGraph(DATASET_ENTITY_TYPE, DATASET_URN); + it("verify column-level lineage path at lineage praph and impact analysis ", () => { + // Open dataset with column-level lineage configured an navigate to lineage tab -> visualize lineage + cy.loginWithCredentials(); + cy.goToEntityLineageGraph(DATASET_ENTITY_TYPE, DATASET_URN); - // Enable “show columns” toggle - cy.waitTextVisible("SampleCypressHdfs"); - cy.clickOptionWithTestId("column-toggle"); - cy.waitTextVisible("shipment_info"); + // Enable “show columns” toggle + cy.waitTextVisible("SampleCypressHdfs"); + cy.clickOptionWithTestId("column-toggle"); + cy.waitTextVisible("shipment_info"); - // Verify functionality of column lineage - cy.get(upstreamColumn).eq(3).click(); - cy.get(upstreamColumn).eq(3).prev().should('not.have.attr', 'fill', 'white'); - cy.get(downstreamColumn).eq(2).prev().should('not.have.attr', 'stroke', 'transparent'); - cy.get(downstreamColumn).eq(2).click(); - cy.get(downstreamColumn).eq(2).prev().should('not.have.attr', 'fill', 'white'); - cy.get(upstreamColumn).eq(3).prev().should('not.have.attr', 'stroke', 'transparent'); + // Verify functionality of column lineage + cy.get(upstreamColumn).eq(3).click(); + cy.get(upstreamColumn) + .eq(3) + .prev() + .should("not.have.attr", "fill", "white"); + cy.get(downstreamColumn) + .eq(2) + .prev() + .should("not.have.attr", "stroke", "transparent"); + cy.get(downstreamColumn).eq(2).click(); + cy.get(downstreamColumn) + .eq(2) + .prev() + .should("not.have.attr", "fill", "white"); + cy.get(upstreamColumn) + .eq(3) + .prev() + .should("not.have.attr", "stroke", "transparent"); - // Open dataset impact analysis view, enable column lineage - cy.goToDataset(DATASET_URN, "SampleCypressHdfsDataset"); - cy.openEntityTab("Lineage"); - cy.clickOptionWithText("Column Lineage"); - cy.clickOptionWithText("Downstream"); + // Open dataset impact analysis view, enable column lineage + cy.goToDataset(DATASET_URN, "SampleCypressHdfsDataset"); + cy.openEntityTab("Lineage"); + cy.clickOptionWithText("Column Lineage"); + cy.clickOptionWithText("Downstream"); - // Verify upstream column lineage, test column path modal - cy.clickOptionWithText("Upstream"); - cy.waitTextVisible("SampleCypressKafkaDataset"); - cy.ensureTextNotPresent("field_bar"); - cy.contains("Select column").click({ force: true}).wait(1000); - cy.get(".rc-virtual-list").contains("shipment_info").click(); - cy.waitTextVisible("field_bar"); - cy.clickOptionWithText("field_bar"); - verifyColumnPathModal("shipment_info", "field_bar"); - cy.get('[data-testid="entity-paths-modal"] [data-icon="close"]').click(); - - // Verify downstream column lineage, test column path modal - cy.goToDataset(DOWNSTREAM_DATASET_URN, "SampleCypressKafkaDataset"); - cy.openEntityTab("Lineage"); - cy.clickOptionWithText("Column Lineage"); - cy.ensureTextNotPresent("shipment_info"); - cy.contains("Select column").click({ force: true}).wait(1000); - cy.get(".rc-virtual-list").contains("field_bar").click(); - cy.waitTextVisible("shipment_info"); - cy.clickOptionWithText("shipment_info"); - verifyColumnPathModal("shipment_info", "field_bar"); - cy.get('[data-testid="entity-paths-modal"] [data-icon="close"]').click(); - }); -}); \ No newline at end of file + // Verify upstream column lineage, test column path modal + cy.clickOptionWithText("Upstream"); + cy.waitTextVisible("SampleCypressKafkaDataset"); + cy.ensureTextNotPresent("field_bar"); + cy.contains("Select column").click({ force: true }).wait(1000); + cy.get(".rc-virtual-list").contains("shipment_info").click(); + cy.waitTextVisible("field_bar"); + cy.clickOptionWithText("field_bar"); + verifyColumnPathModal("shipment_info", "field_bar"); + cy.get('[data-testid="entity-paths-modal"] [data-icon="close"]').click(); + + // Verify downstream column lineage, test column path modal + cy.goToDataset(DOWNSTREAM_DATASET_URN, "SampleCypressKafkaDataset"); + cy.openEntityTab("Lineage"); + cy.clickOptionWithText("Column Lineage"); + cy.ensureTextNotPresent("shipment_info"); + cy.contains("Select column").click({ force: true }).wait(1000); + cy.get(".rc-virtual-list").contains("field_bar").click(); + cy.waitTextVisible("shipment_info"); + cy.clickOptionWithText("shipment_info"); + verifyColumnPathModal("shipment_info", "field_bar"); + cy.get('[data-testid="entity-paths-modal"] [data-icon="close"]').click(); + }); +}); diff --git a/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_graph.js b/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_graph.js index 85db210649c27..e66afdda11ca4 100644 --- a/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_graph.js +++ b/smoke-test/tests/cypress/cypress/e2e/lineage/lineage_graph.js @@ -1,80 +1,118 @@ import { getTimestampMillisNumDaysAgo } from "../../support/commands"; -const DATASET_ENTITY_TYPE = 'dataset'; -const TASKS_ENTITY_TYPE = 'tasks'; -const DATASET_URN = 'urn:li:dataset:(urn:li:dataPlatform:kafka,SampleCypressKafkaDataset,PROD)'; +const DATASET_ENTITY_TYPE = "dataset"; +const TASKS_ENTITY_TYPE = "tasks"; +const DATASET_URN = + "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleCypressKafkaDataset,PROD)"; const JAN_1_2021_TIMESTAMP = 1609553357755; const JAN_1_2022_TIMESTAMP = 1641089357755; const TIMESTAMP_MILLIS_14_DAYS_AGO = getTimestampMillisNumDaysAgo(14); const TIMESTAMP_MILLIS_7_DAYS_AGO = getTimestampMillisNumDaysAgo(7); const TIMESTAMP_MILLIS_NOW = getTimestampMillisNumDaysAgo(0); -const GNP_DATASET_URN = "urn:li:dataset:(urn:li:dataPlatform:snowflake,economic_data.gnp,PROD)"; -const TRANSACTION_ETL_URN = "urn:li:dataJob:(urn:li:dataFlow:(airflow,bq_etl,prod),transaction_etl)"; -const MONTHLY_TEMPERATURE_DATASET_URN = "urn:li:dataset:(urn:li:dataPlatform:snowflake,climate.monthly_temperature,PROD)"; +const GNP_DATASET_URN = + "urn:li:dataset:(urn:li:dataPlatform:snowflake,economic_data.gnp,PROD)"; +const TRANSACTION_ETL_URN = + "urn:li:dataJob:(urn:li:dataFlow:(airflow,bq_etl,prod),transaction_etl)"; +const MONTHLY_TEMPERATURE_DATASET_URN = + "urn:li:dataset:(urn:li:dataPlatform:snowflake,climate.monthly_temperature,PROD)"; describe("lineage_graph", () => { - it("can see full history", () => { - cy.login(); - cy.goToEntityLineageGraph(DATASET_ENTITY_TYPE, DATASET_URN); + it("can see full history", () => { + cy.login(); + cy.goToEntityLineageGraph(DATASET_ENTITY_TYPE, DATASET_URN); - cy.contains("SampleCypressKafka"); - cy.contains("SampleCypressHdfs"); - cy.contains("Baz Chart 1"); - cy.contains("some-cypress"); - }); + cy.contains("SampleCypressKafka"); + cy.contains("SampleCypressHdfs"); + cy.contains("Baz Chart 1"); + cy.contains("some-cypress"); + }); - it("cannot see any lineage edges for 2021", () => { - cy.login(); - cy.goToEntityLineageGraph(DATASET_ENTITY_TYPE, DATASET_URN, JAN_1_2021_TIMESTAMP, JAN_1_2022_TIMESTAMP); + it("cannot see any lineage edges for 2021", () => { + cy.login(); + cy.goToEntityLineageGraph( + DATASET_ENTITY_TYPE, + DATASET_URN, + JAN_1_2021_TIMESTAMP, + JAN_1_2022_TIMESTAMP, + ); - cy.contains("SampleCypressKafka"); - cy.contains("SampleCypressHdfs").should("not.exist"); - cy.contains("Baz Chart 1").should("not.exist"); - cy.contains("some-cypress").should("not.exist"); - }); + cy.contains("SampleCypressKafka"); + cy.contains("SampleCypressHdfs").should("not.exist"); + cy.contains("Baz Chart 1").should("not.exist"); + cy.contains("some-cypress").should("not.exist"); + }); - it("can see when the inputs to a data job change", () => { - cy.login(); - // Between 14 days ago and 7 days ago, only transactions was an input - cy.goToEntityLineageGraph(TASKS_ENTITY_TYPE, TRANSACTION_ETL_URN, TIMESTAMP_MILLIS_14_DAYS_AGO, TIMESTAMP_MILLIS_7_DAYS_AGO); - cy.contains("transaction_etl"); - cy.contains("aggregated"); - cy.contains("transactions"); - cy.contains("user_profile").should("not.exist"); - // 1 day ago, user_profile was also added as an input - cy.goToEntityLineageGraph(TASKS_ENTITY_TYPE, TRANSACTION_ETL_URN, TIMESTAMP_MILLIS_7_DAYS_AGO, TIMESTAMP_MILLIS_NOW); - cy.contains("transaction_etl"); - cy.contains("aggregated"); - cy.contains("transactions"); - cy.contains("user_profile"); - }); - - it("can see when a data job is replaced", () => { - cy.login(); - // Between 14 days ago and 7 days ago, only temperature_etl_1 was an iput - cy.goToEntityLineageGraph(DATASET_ENTITY_TYPE, MONTHLY_TEMPERATURE_DATASET_URN, TIMESTAMP_MILLIS_14_DAYS_AGO, TIMESTAMP_MILLIS_7_DAYS_AGO); - cy.contains("monthly_temperature"); - cy.contains("temperature_etl_1"); - cy.contains("temperature_etl_2").should("not.exist"); - // Since 7 days ago, temperature_etl_1 has been replaced by temperature_etl_2 - cy.goToEntityLineageGraph(DATASET_ENTITY_TYPE, MONTHLY_TEMPERATURE_DATASET_URN, TIMESTAMP_MILLIS_7_DAYS_AGO, TIMESTAMP_MILLIS_NOW); - cy.contains("monthly_temperature"); - cy.contains("temperature_etl_1").should("not.exist"); - cy.contains("temperature_etl_2"); - }); + it("can see when the inputs to a data job change", () => { + cy.login(); + // Between 14 days ago and 7 days ago, only transactions was an input + cy.goToEntityLineageGraph( + TASKS_ENTITY_TYPE, + TRANSACTION_ETL_URN, + TIMESTAMP_MILLIS_14_DAYS_AGO, + TIMESTAMP_MILLIS_7_DAYS_AGO, + ); + cy.contains("transaction_etl"); + cy.contains("aggregated"); + cy.contains("transactions"); + cy.contains("user_profile").should("not.exist"); + // 1 day ago, user_profile was also added as an input + cy.goToEntityLineageGraph( + TASKS_ENTITY_TYPE, + TRANSACTION_ETL_URN, + TIMESTAMP_MILLIS_7_DAYS_AGO, + TIMESTAMP_MILLIS_NOW, + ); + cy.contains("transaction_etl"); + cy.contains("aggregated"); + cy.contains("transactions"); + cy.contains("user_profile"); + }); + + it("can see when a data job is replaced", () => { + cy.login(); + // Between 14 days ago and 7 days ago, only temperature_etl_1 was an iput + cy.goToEntityLineageGraph( + DATASET_ENTITY_TYPE, + MONTHLY_TEMPERATURE_DATASET_URN, + TIMESTAMP_MILLIS_14_DAYS_AGO, + TIMESTAMP_MILLIS_7_DAYS_AGO, + ); + cy.contains("monthly_temperature"); + cy.contains("temperature_etl_1"); + cy.contains("temperature_etl_2").should("not.exist"); + // Since 7 days ago, temperature_etl_1 has been replaced by temperature_etl_2 + cy.goToEntityLineageGraph( + DATASET_ENTITY_TYPE, + MONTHLY_TEMPERATURE_DATASET_URN, + TIMESTAMP_MILLIS_7_DAYS_AGO, + TIMESTAMP_MILLIS_NOW, + ); + cy.contains("monthly_temperature"); + cy.contains("temperature_etl_1").should("not.exist"); + cy.contains("temperature_etl_2"); + }); - it("can see when a dataset join changes", () => { - cy.login(); - // 8 days ago, both gdp and factor_income were joined to create gnp - cy.goToEntityLineageGraph(DATASET_ENTITY_TYPE, GNP_DATASET_URN, TIMESTAMP_MILLIS_14_DAYS_AGO, TIMESTAMP_MILLIS_NOW); - cy.contains("gnp"); - cy.contains("gdp"); - cy.contains("factor_income"); - // 1 day ago, factor_income was removed from the join - cy.goToEntityLineageGraph(DATASET_ENTITY_TYPE, GNP_DATASET_URN, TIMESTAMP_MILLIS_7_DAYS_AGO, TIMESTAMP_MILLIS_NOW); - cy.contains("gnp"); - cy.contains("gdp"); - cy.contains("factor_income").should("not.exist"); - }); + it("can see when a dataset join changes", () => { + cy.login(); + // 8 days ago, both gdp and factor_income were joined to create gnp + cy.goToEntityLineageGraph( + DATASET_ENTITY_TYPE, + GNP_DATASET_URN, + TIMESTAMP_MILLIS_14_DAYS_AGO, + TIMESTAMP_MILLIS_NOW, + ); + cy.contains("gnp"); + cy.contains("gdp"); + cy.contains("factor_income"); + // 1 day ago, factor_income was removed from the join + cy.goToEntityLineageGraph( + DATASET_ENTITY_TYPE, + GNP_DATASET_URN, + TIMESTAMP_MILLIS_7_DAYS_AGO, + TIMESTAMP_MILLIS_NOW, + ); + cy.contains("gnp"); + cy.contains("gdp"); + cy.contains("factor_income").should("not.exist"); }); - \ No newline at end of file +}); diff --git a/smoke-test/tests/cypress/cypress/e2e/login/login.js b/smoke-test/tests/cypress/cypress/e2e/login/login.js index cfeb2619593ff..ad11e80a0caed 100644 --- a/smoke-test/tests/cypress/cypress/e2e/login/login.js +++ b/smoke-test/tests/cypress/cypress/e2e/login/login.js @@ -1,9 +1,9 @@ -describe('login', () => { - it('logs in', () => { - cy.visit('/'); - cy.get('input[data-testid=username]').type('datahub'); - cy.get('input[data-testid=password]').type('datahub'); - cy.contains('Sign In').click(); - cy.contains('Welcome back, ' + Cypress.env('ADMIN_DISPLAYNAME')); +describe("login", () => { + it("logs in", () => { + cy.visit("/"); + cy.get("input[data-testid=username]").type("datahub"); + cy.get("input[data-testid=password]").type("datahub"); + cy.contains("Sign In").click(); + cy.contains(`Welcome back, ${Cypress.env("ADMIN_DISPLAYNAME")}`); }); -}) +}); diff --git a/smoke-test/tests/cypress/cypress/e2e/ml/feature_table.js b/smoke-test/tests/cypress/cypress/e2e/ml/feature_table.js index 229bc86cf3a33..9cf3ec1987da9 100644 --- a/smoke-test/tests/cypress/cypress/e2e/ml/feature_table.js +++ b/smoke-test/tests/cypress/cypress/e2e/ml/feature_table.js @@ -1,54 +1,59 @@ -describe('features', () => { - it('can visit feature tables and see features', () => { - cy.visit('/') - cy.login(); - cy.visit('/featureTables/urn:li:mlFeatureTable:(urn:li:dataPlatform:sagemaker,cypress-feature-table)/Features?is_lineage_mode=false'); - - // the feature table descriptions should be there - cy.contains('Yet another test feature group'); - cy.contains('this is a description from source system'); - - // additional properties are visible - cy.contains('CypressPrimaryKeyTag'); - cy.contains('CypressFeatureTag'); - - // navigate to sources - cy.contains('Sources').click(); - - // feature & primary key sources are visible - cy.contains('SampleCypressHdfsDataset'); - cy.contains('SampleCypressKafkaDataset'); - - // navigate to properties - cy.contains('Properties').click(); - - // custom properties are visible - cy.contains('status'); - cy.contains('Created'); - - }); - - it('can visit feature page', () => { - cy.visit('/') - cy.login(); - cy.visit('/features/urn:li:mlFeature:(cypress-test-2,some-cypress-feature-1)/Feature%20Tables?is_lineage_mode=false'); - - // Shows the parent table - cy.contains('cypress-feature-table'); - - // Has upstream & downstream lineage - cy.contains('1 upstream, 1 downstream'); - }); - - it('can visit primary key page', () => { - cy.visit('/') - cy.login(); - cy.visit('/mlPrimaryKeys/urn:li:mlPrimaryKey:(cypress-test-2,some-cypress-feature-2)/Feature%20Tables?is_lineage_mode=false'); - - // Shows the parent table - cy.contains('cypress-feature-table'); - - // Has upstream from its sources - cy.contains('1 upstream, 0 downstream'); - }); -}) +describe("features", () => { + it("can visit feature tables and see features", () => { + cy.visit("/"); + cy.login(); + cy.visit( + "/featureTables/urn:li:mlFeatureTable:(urn:li:dataPlatform:sagemaker,cypress-feature-table)/Features?is_lineage_mode=false", + ); + + // the feature table descriptions should be there + cy.contains("Yet another test feature group"); + cy.contains("this is a description from source system"); + + // additional properties are visible + cy.contains("CypressPrimaryKeyTag"); + cy.contains("CypressFeatureTag"); + + // navigate to sources + cy.contains("Sources").click(); + + // feature & primary key sources are visible + cy.contains("SampleCypressHdfsDataset"); + cy.contains("SampleCypressKafkaDataset"); + + // navigate to properties + cy.contains("Properties").click(); + + // custom properties are visible + cy.contains("status"); + cy.contains("Created"); + }); + + it("can visit feature page", () => { + cy.visit("/"); + cy.login(); + cy.visit( + "/features/urn:li:mlFeature:(cypress-test-2,some-cypress-feature-1)/Feature%20Tables?is_lineage_mode=false", + ); + + // Shows the parent table + cy.contains("cypress-feature-table"); + + // Has upstream & downstream lineage + cy.contains("1 upstream, 1 downstream"); + }); + + it("can visit primary key page", () => { + cy.visit("/"); + cy.login(); + cy.visit( + "/mlPrimaryKeys/urn:li:mlPrimaryKey:(cypress-test-2,some-cypress-feature-2)/Feature%20Tables?is_lineage_mode=false", + ); + + // Shows the parent table + cy.contains("cypress-feature-table"); + + // Has upstream from its sources + cy.contains("1 upstream, 0 downstream"); + }); +}); diff --git a/smoke-test/tests/cypress/cypress/e2e/ml/model.js b/smoke-test/tests/cypress/cypress/e2e/ml/model.js index 25f98b15d9f6b..ecebaa7649f4b 100644 --- a/smoke-test/tests/cypress/cypress/e2e/ml/model.js +++ b/smoke-test/tests/cypress/cypress/e2e/ml/model.js @@ -1,30 +1,36 @@ -describe('models', () => { - it('can visit models and groups', () => { - cy.visit('/') - cy.login(); - cy.visit('/mlModels/urn:li:mlModel:(urn:li:dataPlatform:sagemaker,cypress-model,PROD)/Summary?is_lineage_mode=false'); +describe("models", () => { + it("can visit models and groups", () => { + cy.visit("/"); + cy.login(); + cy.visit( + "/mlModels/urn:li:mlModel:(urn:li:dataPlatform:sagemaker,cypress-model,PROD)/Summary?is_lineage_mode=false", + ); - cy.contains('ml model description'); + cy.contains("ml model description"); - // the model has metrics & hyper params - cy.contains('another-metric'); - cy.contains('parameter-1'); + // the model has metrics & hyper params + cy.contains("another-metric"); + cy.contains("parameter-1"); - // the model has features - cy.contains('Features').click(); - cy.contains('some-cypress-feature-1'); + // the model has features + cy.contains("Features").click(); + cy.contains("some-cypress-feature-1"); - // the model has a group - cy.visit('/mlModels/urn:li:mlModel:(urn:li:dataPlatform:sagemaker,cypress-model,PROD)/Group?is_lineage_mode=false'); - cy.contains('cypress-model-package-group'); - }); + // the model has a group + cy.visit( + "/mlModels/urn:li:mlModel:(urn:li:dataPlatform:sagemaker,cypress-model,PROD)/Group?is_lineage_mode=false", + ); + cy.contains("cypress-model-package-group"); + }); - it('can visit models and groups', () => { - cy.visit('/') - cy.login(); - cy.visit('/mlModelGroup/urn:li:mlModelGroup:(urn:li:dataPlatform:sagemaker,cypress-model-package-group,PROD)'); - // the model group has its model - cy.contains('cypress-model'); - cy.contains('Just a model package group.'); - }); -}) + it("can visit models and groups", () => { + cy.visit("/"); + cy.login(); + cy.visit( + "/mlModelGroup/urn:li:mlModelGroup:(urn:li:dataPlatform:sagemaker,cypress-model-package-group,PROD)", + ); + // the model group has its model + cy.contains("cypress-model"); + cy.contains("Just a model package group."); + }); +}); diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/add_users.js b/smoke-test/tests/cypress/cypress/e2e/mutations/add_users.js index ba225ba37884b..411f7d574bf63 100644 --- a/smoke-test/tests/cypress/cypress/e2e/mutations/add_users.js +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/add_users.js @@ -1,7 +1,7 @@ const tryToSignUp = () => { - let number = Math.floor(Math.random() * 100000); - let name = `Example Name ${number}`; - let email = `example${number}@example.com`; + const number = Math.floor(Math.random() * 100000); + const name = `Example Name ${number}`; + const email = `example${number}@example.com`; cy.enterTextInTestId("email", email); cy.enterTextInTestId("name", name); cy.enterTextInTestId("password", "Example password"); @@ -15,7 +15,7 @@ const tryToSignUp = () => { }; describe("add_user", () => { - let registeredEmail = ""; + let registeredEmail = ""; it("go to user link and invite a user", () => { cy.login(); @@ -53,7 +53,7 @@ describe("add_user", () => { cy.get('[data-testid="reset-menu-item"]').should( "have.attr", "aria-disabled", - "true" + "true", ); }); diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/dataset_health.js b/smoke-test/tests/cypress/cypress/e2e/mutations/dataset_health.js index 2ecf8b1833b24..072574e0f57aa 100644 --- a/smoke-test/tests/cypress/cypress/e2e/mutations/dataset_health.js +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/dataset_health.js @@ -1,15 +1,18 @@ -const urn = "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_health_test,PROD)"; +const urn = + "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_health_test,PROD)"; const datasetName = "cypress_health_test"; describe("dataset health test", () => { - it("go to dataset with failing assertions and active incidents and verify health of dataset", () => { - cy.login(); - cy.goToDataset(urn, datasetName); - // Ensure that the “Health” badge is present and there is an active incident warning - cy.get(`[href="/dataset/${urn}/Validation"]`).should("be.visible"); - cy.get(`[href="/dataset/${urn}/Validation"] span`).trigger("mouseover", { force: true }); - cy.waitTextVisible("This asset may be unhealthy"); - cy.waitTextVisible("Assertions 1 of 1 assertions are failing"); - cy.waitTextVisible("1 active incident"); + it("go to dataset with failing assertions and active incidents and verify health of dataset", () => { + cy.login(); + cy.goToDataset(urn, datasetName); + // Ensure that the “Health” badge is present and there is an active incident warning + cy.get(`[href="/dataset/${urn}/Validation"]`).should("be.visible"); + cy.get(`[href="/dataset/${urn}/Validation"] span`).trigger("mouseover", { + force: true, }); -}); \ No newline at end of file + cy.waitTextVisible("This asset may be unhealthy"); + cy.waitTextVisible("Assertions 1 of 1 assertions are failing"); + cy.waitTextVisible("1 active incident"); + }); +}); diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/dataset_ownership.js b/smoke-test/tests/cypress/cypress/e2e/mutations/dataset_ownership.js index 552c3d460ade9..452e2eb3408d3 100644 --- a/smoke-test/tests/cypress/cypress/e2e/mutations/dataset_ownership.js +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/dataset_ownership.js @@ -5,64 +5,105 @@ const password = "Example password"; const group_name = `Test group ${test_id}`; const addOwner = (owner, type, elementId) => { - cy.clickOptionWithTestId("add-owners-button"); - cy.contains("Search for users or groups...").click({ force: true }); - cy.focused().type(owner); - cy.get('.ant-select-item').contains(owner).click(); - cy.focused().blur(); - cy.waitTextVisible(owner); - cy.get('[role="dialog"]').contains("Technical Owner").click(); - cy.get('[role="listbox"]').parent().contains(type).click(); - cy.get('[role="dialog"]').contains(type).should("be.visible"); - cy.clickOptionWithText("Done"); - cy.waitTextVisible("Owners Added"); - cy.waitTextVisible(type); - cy.waitTextVisible(owner).wait(3000); - cy.clickOptionWithText(owner); - cy.waitTextVisible("SampleCypressHiveDataset"); - cy.goToDataset("urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)", "SampleCypressHiveDataset"); - cy.get(elementId).next().click(); - cy.clickOptionWithText("Yes"); - cy.waitTextVisible("Owner Removed"); - cy.ensureTextNotPresent(owner); - cy.ensureTextNotPresent(type); -} + cy.clickOptionWithTestId("add-owners-button"); + cy.contains("Search for users or groups...").click({ force: true }); + cy.focused().type(owner); + cy.get(".ant-select-item").contains(owner).click(); + cy.focused().blur(); + cy.waitTextVisible(owner); + cy.get('[role="dialog"]').contains("Technical Owner").click(); + cy.get('[role="listbox"]').parent().contains(type).click(); + cy.get('[role="dialog"]').contains(type).should("be.visible"); + cy.clickOptionWithText("Done"); + cy.waitTextVisible("Owners Added"); + cy.waitTextVisible(type); + cy.waitTextVisible(owner).wait(3000); + cy.clickOptionWithText(owner); + cy.waitTextVisible("SampleCypressHiveDataset"); + cy.goToDataset( + "urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)", + "SampleCypressHiveDataset", + ); + cy.get(elementId).next().click(); + cy.clickOptionWithText("Yes"); + cy.waitTextVisible("Owner Removed"); + cy.ensureTextNotPresent(owner); + cy.ensureTextNotPresent(type); +}; describe("add, remove ownership for dataset", () => { - beforeEach(() => { - cy.on('uncaught:exception', (err, runnable) => { return false; }); - }); + beforeEach(() => { + cy.on("uncaught:exception", (err, runnable) => false); + }); - it("create test user and test group, add user to a group", () => { - cy.loginWithCredentials(); - cy.createUser(username, password, email); - cy.createGroup(group_name, "Test group description", test_id); - cy.addGroupMember(group_name, `/group/urn:li:corpGroup:${test_id}/assets`, username); - }); + it("create test user and test group, add user to a group", () => { + cy.loginWithCredentials(); + cy.createUser(username, password, email); + cy.createGroup(group_name, "Test group description", test_id); + cy.addGroupMember( + group_name, + `/group/urn:li:corpGroup:${test_id}/assets`, + username, + ); + }); - it("open test dataset page, add and remove user ownership(test every type)", () => { - cy.loginWithCredentials(); - cy.goToDataset("urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)", "SampleCypressHiveDataset"); - //business owner - addOwner(username, "Business Owner", `[href="/user/urn:li:corpuser:example${test_id}@example.com"]`); - //data steward - addOwner(username, "Data Steward", `[href="/user/urn:li:corpuser:example${test_id}@example.com"]`); - //none - addOwner(username, "None", `[href="/user/urn:li:corpuser:example${test_id}@example.com"]`); - //technical owner - addOwner(username, "Technical Owner", `[href="/user/urn:li:corpuser:example${test_id}@example.com"]`); - }); + it("open test dataset page, add and remove user ownership(test every type)", () => { + cy.loginWithCredentials(); + cy.goToDataset( + "urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)", + "SampleCypressHiveDataset", + ); + // business owner + addOwner( + username, + "Business Owner", + `[href="/user/urn:li:corpuser:example${test_id}@example.com"]`, + ); + // data steward + addOwner( + username, + "Data Steward", + `[href="/user/urn:li:corpuser:example${test_id}@example.com"]`, + ); + // none + addOwner( + username, + "None", + `[href="/user/urn:li:corpuser:example${test_id}@example.com"]`, + ); + // technical owner + addOwner( + username, + "Technical Owner", + `[href="/user/urn:li:corpuser:example${test_id}@example.com"]`, + ); + }); - it("open test dataset page, add and remove group ownership(test every type)", () => { - cy.loginWithCredentials(); - cy.goToDataset("urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)", "SampleCypressHiveDataset"); - //business owner - addOwner(group_name, "Business Owner", `[href="/group/urn:li:corpGroup:${test_id}"]`); - //data steward - addOwner(group_name, "Data Steward", `[href="/group/urn:li:corpGroup:${test_id}"]`); - //none - addOwner(group_name, "None", `[href="/group/urn:li:corpGroup:${test_id}"]`); - //technical owner - addOwner(group_name, "Technical Owner", `[href="/group/urn:li:corpGroup:${test_id}"]`); - }); -}); \ No newline at end of file + it("open test dataset page, add and remove group ownership(test every type)", () => { + cy.loginWithCredentials(); + cy.goToDataset( + "urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)", + "SampleCypressHiveDataset", + ); + // business owner + addOwner( + group_name, + "Business Owner", + `[href="/group/urn:li:corpGroup:${test_id}"]`, + ); + // data steward + addOwner( + group_name, + "Data Steward", + `[href="/group/urn:li:corpGroup:${test_id}"]`, + ); + // none + addOwner(group_name, "None", `[href="/group/urn:li:corpGroup:${test_id}"]`); + // technical owner + addOwner( + group_name, + "Technical Owner", + `[href="/group/urn:li:corpGroup:${test_id}"]`, + ); + }); +}); diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/deprecations.js b/smoke-test/tests/cypress/cypress/e2e/mutations/deprecations.js index 2fa11654a3c3e..5c1d329b9ab0b 100644 --- a/smoke-test/tests/cypress/cypress/e2e/mutations/deprecations.js +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/deprecations.js @@ -1,29 +1,30 @@ describe("dataset deprecation", () => { - it("go to dataset and check deprecation works", () => { - const urn = "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)"; - const datasetName = "cypress_logging_events"; - cy.login(); - cy.goToDataset(urn, datasetName); - cy.openThreeDotDropdown(); - cy.clickOptionWithText("Mark as deprecated"); - cy.addViaFormModal("test deprecation", "Add Deprecation Details"); - cy.waitTextVisible("Deprecation Updated"); - cy.waitTextVisible("DEPRECATED") - cy.openThreeDotDropdown(); - cy.clickOptionWithText("Mark as un-deprecated"); - cy.waitTextVisible("Deprecation Updated"); - cy.ensureTextNotPresent("DEPRECATED"); - cy.openThreeDotDropdown(); - cy.clickOptionWithText("Mark as deprecated"); - cy.addViaFormModal("test deprecation", "Add Deprecation Details"); - cy.waitTextVisible("Deprecation Updated"); - cy.waitTextVisible("DEPRECATED"); - cy.contains("DEPRECATED").trigger("mouseover", { force: true }); - cy.waitTextVisible("Deprecation note"); - cy.get("[role='tooltip']").contains("Mark as un-deprecated").click(); - cy.waitTextVisible("Confirm Mark as un-deprecated"); - cy.get("button").contains("Yes").click(); - cy.waitTextVisible("Marked assets as un-deprecated!"); - cy.ensureTextNotPresent("DEPRECATED"); - }); + it("go to dataset and check deprecation works", () => { + const urn = + "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)"; + const datasetName = "cypress_logging_events"; + cy.login(); + cy.goToDataset(urn, datasetName); + cy.openThreeDotDropdown(); + cy.clickOptionWithText("Mark as deprecated"); + cy.addViaFormModal("test deprecation", "Add Deprecation Details"); + cy.waitTextVisible("Deprecation Updated"); + cy.waitTextVisible("DEPRECATED"); + cy.openThreeDotDropdown(); + cy.clickOptionWithText("Mark as un-deprecated"); + cy.waitTextVisible("Deprecation Updated"); + cy.ensureTextNotPresent("DEPRECATED"); + cy.openThreeDotDropdown(); + cy.clickOptionWithText("Mark as deprecated"); + cy.addViaFormModal("test deprecation", "Add Deprecation Details"); + cy.waitTextVisible("Deprecation Updated"); + cy.waitTextVisible("DEPRECATED"); + cy.contains("DEPRECATED").trigger("mouseover", { force: true }); + cy.waitTextVisible("Deprecation note"); + cy.get("[role='tooltip']").contains("Mark as un-deprecated").click(); + cy.waitTextVisible("Confirm Mark as un-deprecated"); + cy.get("button").contains("Yes").click(); + cy.waitTextVisible("Marked assets as un-deprecated!"); + cy.ensureTextNotPresent("DEPRECATED"); + }); }); diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/domains.js b/smoke-test/tests/cypress/cypress/e2e/mutations/domains.js index 3de0e9b4b893e..694c4009a77f4 100644 --- a/smoke-test/tests/cypress/cypress/e2e/mutations/domains.js +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/domains.js @@ -1,76 +1,78 @@ import { aliasQuery, hasOperationName } from "../utils"; const test_domain_id = Math.floor(Math.random() * 100000); -const test_domain = `CypressDomainTest ${test_domain_id}` -const test_domain_urn = `urn:li:domain:${test_domain_id}` - +const test_domain = `CypressDomainTest ${test_domain_id}`; +const test_domain_urn = `urn:li:domain:${test_domain_id}`; describe("add remove domain", () => { - beforeEach(() => { - cy.intercept("POST", "/api/v2/graphql", (req) => { - aliasQuery(req, "appConfig"); - }); - }); - - const setDomainsFeatureFlag = (isOn) => { - cy.intercept("POST", "/api/v2/graphql", (req) => { - if (hasOperationName(req, "appConfig")) { - req.reply((res) => { - res.body.data.appConfig.featureFlags.nestedDomainsEnabled = isOn; - }); - } + beforeEach(() => { + cy.intercept("POST", "/api/v2/graphql", (req) => { + aliasQuery(req, "appConfig"); + }); + }); + + const setDomainsFeatureFlag = (isOn) => { + cy.intercept("POST", "/api/v2/graphql", (req) => { + if (hasOperationName(req, "appConfig")) { + req.reply((res) => { + res.body.data.appConfig.featureFlags.nestedDomainsEnabled = isOn; }); - }; + } + }); + }; - it("create domain", () => { - cy.loginWithCredentials(); - cy.goToDomainList(); - cy.clickOptionWithText("New Domain"); - cy.waitTextVisible("Create New Domain"); - cy.get('[data-testid="create-domain-name"]').click().type(test_domain) - cy.clickOptionWithText('Advanced') - cy.get('[data-testid="create-domain-id"]').click().type(test_domain_id) - cy.get('[data-testid="create-domain-button"]').click() - cy.waitTextVisible(test_domain) - }) + it("create domain", () => { + cy.loginWithCredentials(); + cy.goToDomainList(); + cy.clickOptionWithText("New Domain"); + cy.waitTextVisible("Create New Domain"); + cy.get('[data-testid="create-domain-name"]').click().type(test_domain); + cy.clickOptionWithText("Advanced"); + cy.get('[data-testid="create-domain-id"]').click().type(test_domain_id); + cy.get('[data-testid="create-domain-button"]').click(); + cy.waitTextVisible(test_domain); + }); - it("add entities to domain", () => { - setDomainsFeatureFlag(false); - cy.loginWithCredentials(); - cy.goToDomainList(); - cy.clickOptionWithText(test_domain); - cy.waitTextVisible("Add assets") - cy.clickOptionWithText("Add assets") - cy.get(".ant-modal-content").within(() => { - cy.get('[data-testid="search-input"]').click().invoke("val", "cypress_project.jaffle_shop.").type("customer") - cy.contains("BigQuery", {timeout: 30000 }) - cy.get(".ant-checkbox-input").first().click() - cy.get("#continueButton").click() - }) - cy.waitTextVisible("Added assets to Domain!") - }) + it("add entities to domain", () => { + setDomainsFeatureFlag(false); + cy.loginWithCredentials(); + cy.goToDomainList(); + cy.clickOptionWithText(test_domain); + cy.waitTextVisible("Add assets"); + cy.clickOptionWithText("Add assets"); + cy.get(".ant-modal-content").within(() => { + cy.get('[data-testid="search-input"]') + .click() + .invoke("val", "cypress_project.jaffle_shop.") + .type("customer"); + cy.contains("BigQuery", { timeout: 30000 }); + cy.get(".ant-checkbox-input").first().click(); + cy.get("#continueButton").click(); + }); + cy.waitTextVisible("Added assets to Domain!"); + }); - it("remove entity from domain", () => { - setDomainsFeatureFlag(false); - cy.loginWithCredentials(); - cy.goToDomainList(); - cy.removeDomainFromDataset( - "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.customers,PROD)", - "customers", - test_domain_urn - ) - }) + it("remove entity from domain", () => { + setDomainsFeatureFlag(false); + cy.loginWithCredentials(); + cy.goToDomainList(); + cy.removeDomainFromDataset( + "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.customers,PROD)", + "customers", + test_domain_urn, + ); + }); - it("delete a domain and ensure dangling reference is deleted on entities", () => { - setDomainsFeatureFlag(false); - cy.loginWithCredentials(); - cy.goToDomainList(); - cy.get('[data-testid="dropdown-menu-' + test_domain_urn + '"]').click(); - cy.clickOptionWithText("Delete"); - cy.clickOptionWithText("Yes"); - cy.ensureTextNotPresent(test_domain) - cy.goToContainer("urn:li:container:348c96555971d3f5c1ffd7dd2e7446cb") - cy.waitTextVisible("customers") - cy.ensureTextNotPresent(test_domain) - }) -}); \ No newline at end of file + it("delete a domain and ensure dangling reference is deleted on entities", () => { + setDomainsFeatureFlag(false); + cy.loginWithCredentials(); + cy.goToDomainList(); + cy.get(`[data-testid="dropdown-menu-${test_domain_urn}"]`).click(); + cy.clickOptionWithText("Delete"); + cy.clickOptionWithText("Yes"); + cy.ensureTextNotPresent(test_domain); + cy.goToContainer("urn:li:container:348c96555971d3f5c1ffd7dd2e7446cb"); + cy.waitTextVisible("customers"); + cy.ensureTextNotPresent(test_domain); + }); +}); diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js b/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js index c6d2b205250e0..4d14683ac4b13 100644 --- a/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/edit_documentation.js @@ -5,10 +5,10 @@ const correct_url = "https://www.linkedin.com"; describe("edit documentation and link to dataset", () => { it("open test dataset page, edit documentation", () => { - //edit documentation and verify changes saved + // edit documentation and verify changes saved cy.loginWithCredentials(); cy.visit( - "/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)/Schema" + "/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)/Schema", ); cy.openEntityTab("Documentation"); cy.waitTextVisible("my hive dataset"); @@ -19,7 +19,7 @@ describe("edit documentation and link to dataset", () => { cy.clickOptionWithTestId("description-editor-save-button"); cy.waitTextVisible("Description Updated"); cy.waitTextVisible(documentation_edited); - //return documentation to original state + // return documentation to original state cy.clickOptionWithTestId("edit-documentation-button"); cy.focused().clear().wait(1000); cy.focused().type("my hive dataset"); @@ -31,7 +31,7 @@ describe("edit documentation and link to dataset", () => { it("open test dataset page, remove and add dataset link", () => { cy.loginWithCredentials(); cy.visit( - "/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)/Schema" + "/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)/Schema", ); cy.openEntityTab("Documentation"); cy.contains("Sample doc").trigger("mouseover", { force: true }); @@ -76,7 +76,7 @@ describe("edit documentation and link to dataset", () => { it("edit field documentation", () => { cy.loginWithCredentials(); cy.visit( - "/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)/Schema" + "/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)/Schema", ); cy.clickOptionWithText("field_foo"); cy.clickOptionWithTestId("edit-field-description"); @@ -96,4 +96,4 @@ describe("edit documentation and link to dataset", () => { cy.waitTextVisible("Foo field description has changed"); cy.waitTextVisible("(edited)"); }); -}); \ No newline at end of file +}); diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/ingestion_source.js b/smoke-test/tests/cypress/cypress/e2e/mutations/ingestion_source.js index 6c5dd77810644..8f50262b41d2c 100644 --- a/smoke-test/tests/cypress/cypress/e2e/mutations/ingestion_source.js +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/ingestion_source.js @@ -1,4 +1,3 @@ - const number = Math.floor(Math.random() * 100000); const accound_id = `account${number}`; const warehouse_id = `warehouse${number}`; @@ -8,61 +7,67 @@ const role = `role${number}`; const ingestion_source_name = `ingestion source ${number}`; describe("ingestion source creation flow", () => { - it("create a ingestion source using ui, verify ingestion source details saved correctly, remove ingestion source", () => { - // Go to ingestion page, create a snowflake source - cy.loginWithCredentials(); - cy.goToIngestionPage(); - cy.clickOptionWithTestId("create-ingestion-source-button"); - cy.clickOptionWithText("Snowflake"); - cy.waitTextVisible("Snowflake Recipe"); - cy.get("#account_id").type(accound_id); - cy.get("#warehouse").type(warehouse_id); - cy.get("#username").type(username); - cy.get("#password").type(password); - cy.focused().blur(); - cy.get("#role").type(role); + it("create a ingestion source using ui, verify ingestion source details saved correctly, remove ingestion source", () => { + // Go to ingestion page, create a snowflake source + cy.loginWithCredentials(); + cy.goToIngestionPage(); + cy.clickOptionWithTestId("create-ingestion-source-button"); + cy.clickOptionWithText("Snowflake"); + cy.waitTextVisible("Snowflake Recipe"); + cy.get("#account_id").type(accound_id); + cy.get("#warehouse").type(warehouse_id); + cy.get("#username").type(username); + cy.get("#password").type(password); + cy.focused().blur(); + cy.get("#role").type(role); - // Verify yaml recipe is generated correctly - cy.clickOptionWithTestId("recipe-builder-yaml-button"); - cy.waitTextVisible("account_id"); - cy.waitTextVisible(accound_id); - cy.waitTextVisible(warehouse_id); - cy.waitTextVisible(username); - cy.waitTextVisible(password); - cy.waitTextVisible(role); + // Verify yaml recipe is generated correctly + cy.clickOptionWithTestId("recipe-builder-yaml-button"); + cy.waitTextVisible("account_id"); + cy.waitTextVisible(accound_id); + cy.waitTextVisible(warehouse_id); + cy.waitTextVisible(username); + cy.waitTextVisible(password); + cy.waitTextVisible(role); - // Finish creating source - cy.clickOptionWithTestId("recipe-builder-next-button"); - cy.waitTextVisible("Configure an Ingestion Schedule"); - cy.clickOptionWithTestId("ingestion-schedule-next-button"); - cy.waitTextVisible("Give this ingestion source a name."); - cy.get('[data-testid="source-name-input"]').type(ingestion_source_name); - cy.clickOptionWithTestId("ingestion-source-save-button"); - cy.waitTextVisible("Successfully created ingestion source!").wait(5000) - cy.waitTextVisible(ingestion_source_name); - cy.get('[data-testid="ingestion-source-table-status"]').contains("Pending...").should("be.visible"); + // Finish creating source + cy.clickOptionWithTestId("recipe-builder-next-button"); + cy.waitTextVisible("Configure an Ingestion Schedule"); + cy.clickOptionWithTestId("ingestion-schedule-next-button"); + cy.waitTextVisible("Give this ingestion source a name."); + cy.get('[data-testid="source-name-input"]').type(ingestion_source_name); + cy.clickOptionWithTestId("ingestion-source-save-button"); + cy.waitTextVisible("Successfully created ingestion source!").wait(5000); + cy.waitTextVisible(ingestion_source_name); + cy.get('[data-testid="ingestion-source-table-status"]') + .contains("Pending...") + .should("be.visible"); - // Verify ingestion source details are saved correctly - cy.get('[data-testid="ingestion-source-table-edit-button"]').first().click(); - cy.waitTextVisible("Edit Ingestion Source"); - cy.get("#account_id").should("have.value", accound_id); - cy.get("#warehouse").should("have.value", warehouse_id); - cy.get("#username").should("have.value", username); - cy.get("#password").should("have.value", password); - cy.get("#role").should("have.value", role); - cy.get("button").contains("Next").click(); - cy.waitTextVisible("Configure an Ingestion Schedule"); - cy.clickOptionWithTestId("ingestion-schedule-next-button"); - cy.get('[data-testid="source-name-input"]').clear().type(ingestion_source_name + " EDITED"); - cy.clickOptionWithTestId("ingestion-source-save-button"); - cy.waitTextVisible("Successfully updated ingestion source!"); - cy.waitTextVisible(ingestion_source_name + " EDITED"); + // Verify ingestion source details are saved correctly + cy.get('[data-testid="ingestion-source-table-edit-button"]') + .first() + .click(); + cy.waitTextVisible("Edit Ingestion Source"); + cy.get("#account_id").should("have.value", accound_id); + cy.get("#warehouse").should("have.value", warehouse_id); + cy.get("#username").should("have.value", username); + cy.get("#password").should("have.value", password); + cy.get("#role").should("have.value", role); + cy.get("button").contains("Next").click(); + cy.waitTextVisible("Configure an Ingestion Schedule"); + cy.clickOptionWithTestId("ingestion-schedule-next-button"); + cy.get('[data-testid="source-name-input"]') + .clear() + .type(`${ingestion_source_name} EDITED`); + cy.clickOptionWithTestId("ingestion-source-save-button"); + cy.waitTextVisible("Successfully updated ingestion source!"); + cy.waitTextVisible(`${ingestion_source_name} EDITED`); - // Remove ingestion source - cy.get('[data-testid="delete-button"]').first().click(); - cy.waitTextVisible("Confirm Ingestion Source Removal"); - cy.get("button").contains("Yes").click(); - cy.waitTextVisible("Removed ingestion source."); - cy.ensureTextNotPresent(ingestion_source_name + " EDITED") - }) -}); \ No newline at end of file + // Remove ingestion source + cy.get('[data-testid="delete-button"]').first().click(); + cy.waitTextVisible("Confirm Ingestion Source Removal"); + cy.get("button").contains("Yes").click(); + cy.waitTextVisible("Removed ingestion source."); + cy.ensureTextNotPresent(`${ingestion_source_name} EDITED`); + }); +}); diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/managed_ingestion.js b/smoke-test/tests/cypress/cypress/e2e/mutations/managed_ingestion.js index c355aaabc336a..d23b0ca7523b8 100644 --- a/smoke-test/tests/cypress/cypress/e2e/mutations/managed_ingestion.js +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/managed_ingestion.js @@ -1,39 +1,40 @@ function readyToTypeEditor() { - return cy.get('.monaco-editor textarea:first') - .click().focused(); + return cy.get(".monaco-editor textarea:first").click().focused(); } describe("run managed ingestion", () => { - it("create run managed ingestion source", () => { - let number = Math.floor(Math.random() * 100000); - let testName = `cypress test source ${number}` - let cli_version = "0.12.0"; - cy.login(); - cy.goToIngestionPage(); - cy.clickOptionWithText("Create new source"); - cy.clickOptionWithTextToScrollintoView("Other"); + it("create run managed ingestion source", () => { + const number = Math.floor(Math.random() * 100000); + const testName = `cypress test source ${number}`; + const cli_version = "0.12.0"; + cy.login(); + cy.goToIngestionPage(); + cy.clickOptionWithText("Create new source"); + cy.clickOptionWithTextToScrollintoView("Other"); - cy.waitTextVisible("source-type"); - readyToTypeEditor().type('{ctrl}a').clear() - readyToTypeEditor().type("source:{enter}"); - readyToTypeEditor().type(" type: demo-data"); - readyToTypeEditor().type("{enter}"); - // no space because the editor starts new line at same indentation - readyToTypeEditor().type("config: {}"); - cy.clickOptionWithText("Next") - cy.clickOptionWithText("Next") + cy.waitTextVisible("source-type"); + readyToTypeEditor().type("{ctrl}a").clear(); + readyToTypeEditor().type("source:{enter}"); + readyToTypeEditor().type(" type: demo-data"); + readyToTypeEditor().type("{enter}"); + // no space because the editor starts new line at same indentation + readyToTypeEditor().type("config: {}"); + cy.clickOptionWithText("Next"); + cy.clickOptionWithText("Next"); - cy.enterTextInTestId('source-name-input', testName) - cy.clickOptionWithText("Advanced") - cy.enterTextInTestId('cli-version-input', cli_version) - cy.clickOptionWithTextToScrollintoView("Save & Run") - cy.waitTextVisible(testName) + cy.enterTextInTestId("source-name-input", testName); + cy.clickOptionWithText("Advanced"); + cy.enterTextInTestId("cli-version-input", cli_version); + cy.clickOptionWithTextToScrollintoView("Save & Run"); + cy.waitTextVisible(testName); - cy.contains(testName).parent().within(() => { - cy.contains("Succeeded", {timeout: 180000}) - cy.clickOptionWithTestId("delete-button"); - }) - cy.clickOptionWithText("Yes") - cy.ensureTextNotPresent(testName) - }) + cy.contains(testName) + .parent() + .within(() => { + cy.contains("Succeeded", { timeout: 180000 }); + cy.clickOptionWithTestId("delete-button"); + }); + cy.clickOptionWithText("Yes"); + cy.ensureTextNotPresent(testName); + }); }); diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js b/smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js index 77fd63b9cae02..57eccc3211096 100644 --- a/smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/managing_secrets.js @@ -7,104 +7,110 @@ const role = `role${number}`; const ingestion_source_name = `ingestion source ${number}`; describe("managing secrets for ingestion creation", () => { - it("create a secret, create ingestion source using a secret, remove a secret", () => { - // Navigate to the manage ingestion page → secrets - cy.loginWithCredentials(); - cy.goToIngestionPage(); - cy.openEntityTab("Secrets"); + it("create a secret, create ingestion source using a secret, remove a secret", () => { + // Navigate to the manage ingestion page → secrets + cy.loginWithCredentials(); + cy.goToIngestionPage(); + cy.openEntityTab("Secrets"); - // Create a new secret - cy.clickOptionWithTestId("create-secret-button"); - cy.enterTextInTestId('secret-modal-name-input', `secretname${number}`); - cy.enterTextInTestId('secret-modal-value-input', `secretvalue${number}`); - cy.enterTextInTestId('secret-modal-description-input', `secretdescription${number}`); - cy.clickOptionWithTestId("secret-modal-create-button"); - cy.waitTextVisible("Successfully created Secret!"); - cy.waitTextVisible(`secretname${number}`); - cy.waitTextVisible(`secretdescription${number}`).wait(5000) + // Create a new secret + cy.clickOptionWithTestId("create-secret-button"); + cy.enterTextInTestId("secret-modal-name-input", `secretname${number}`); + cy.enterTextInTestId("secret-modal-value-input", `secretvalue${number}`); + cy.enterTextInTestId( + "secret-modal-description-input", + `secretdescription${number}`, + ); + cy.clickOptionWithTestId("secret-modal-create-button"); + cy.waitTextVisible("Successfully created Secret!"); + cy.waitTextVisible(`secretname${number}`); + cy.waitTextVisible(`secretdescription${number}`).wait(5000); - // Create an ingestion source using a secret - cy.goToIngestionPage(); - cy.get("#ingestion-create-source").click(); - cy.clickOptionWithText("Snowflake"); - cy.waitTextVisible("Snowflake Recipe"); - cy.get("#account_id").type(accound_id); - cy.get("#warehouse").type(warehouse_id); - cy.get("#username").type(username); - cy.get("#password").click().wait(1000); - cy.contains(`secretname${number}`).click({force: true}); - cy.focused().blur(); - cy.get("#role").type(role); - cy.get("button").contains("Next").click(); - cy.waitTextVisible("Configure an Ingestion Schedule"); - cy.get("button").contains("Next").click(); - cy.waitTextVisible("Give this ingestion source a name."); - cy.get('[data-testid="source-name-input"]').type(ingestion_source_name); - cy.get("button").contains("Save").click(); - cy.waitTextVisible("Successfully created ingestion source!").wait(5000) - cy.waitTextVisible(ingestion_source_name); - cy.get("button").contains("Pending...").should("be.visible"); + // Create an ingestion source using a secret + cy.goToIngestionPage(); + cy.get("#ingestion-create-source").click(); + cy.clickOptionWithText("Snowflake"); + cy.waitTextVisible("Snowflake Recipe"); + cy.get("#account_id").type(accound_id); + cy.get("#warehouse").type(warehouse_id); + cy.get("#username").type(username); + cy.get("#password").click().wait(1000); + cy.contains(`secretname${number}`).click({ force: true }); + cy.focused().blur(); + cy.get("#role").type(role); + cy.get("button").contains("Next").click(); + cy.waitTextVisible("Configure an Ingestion Schedule"); + cy.get("button").contains("Next").click(); + cy.waitTextVisible("Give this ingestion source a name."); + cy.get('[data-testid="source-name-input"]').type(ingestion_source_name); + cy.get("button").contains("Save").click(); + cy.waitTextVisible("Successfully created ingestion source!").wait(5000); + cy.waitTextVisible(ingestion_source_name); + cy.get("button").contains("Pending...").should("be.visible"); - // Remove a secret - cy.openEntityTab("Secrets"); - cy.waitTextVisible(`secretname${number}`); - cy.get('[data-icon="delete"]').first().click(); - cy.waitTextVisible("Confirm Secret Removal"); - cy.get("button").contains("Yes").click(); - cy.waitTextVisible("Removed secret."); - cy.ensureTextNotPresent(`secretname${number}`); - cy.ensureTextNotPresent(`secretdescription${number}`); + // Remove a secret + cy.openEntityTab("Secrets"); + cy.waitTextVisible(`secretname${number}`); + cy.get('[data-icon="delete"]').first().click(); + cy.waitTextVisible("Confirm Secret Removal"); + cy.get("button").contains("Yes").click(); + cy.waitTextVisible("Removed secret."); + cy.ensureTextNotPresent(`secretname${number}`); + cy.ensureTextNotPresent(`secretdescription${number}`); - // Remove ingestion source - cy.goToIngestionPage(); - cy.get('[data-testid="delete-button"]').first().click(); - cy.waitTextVisible("Confirm Ingestion Source Removal"); - cy.get("button").contains("Yes").click(); - cy.waitTextVisible("Removed ingestion source."); - cy.ensureTextNotPresent(ingestion_source_name) + // Remove ingestion source + cy.goToIngestionPage(); + cy.get('[data-testid="delete-button"]').first().click(); + cy.waitTextVisible("Confirm Ingestion Source Removal"); + cy.get("button").contains("Yes").click(); + cy.waitTextVisible("Removed ingestion source."); + cy.ensureTextNotPresent(ingestion_source_name); - // Verify secret is not present during ingestion source creation for password dropdown - cy.clickOptionWithText("Create new source"); - cy.clickOptionWithText("Snowflake"); - cy.waitTextVisible("Snowflake Recipe"); - cy.get("#account_id").type(accound_id); - cy.get("#warehouse").type(warehouse_id); - cy.get("#username").type(username); - cy.get("#password").click().wait(1000); - cy.ensureTextNotPresent(`secretname${number}`); + // Verify secret is not present during ingestion source creation for password dropdown + cy.clickOptionWithText("Create new source"); + cy.clickOptionWithText("Snowflake"); + cy.waitTextVisible("Snowflake Recipe"); + cy.get("#account_id").type(accound_id); + cy.get("#warehouse").type(warehouse_id); + cy.get("#username").type(username); + cy.get("#password").click().wait(1000); + cy.ensureTextNotPresent(`secretname${number}`); - // Verify secret can be added during ingestion source creation and used successfully - cy.clickOptionWithText("Create Secret"); - cy.enterTextInTestId('secret-modal-name-input', `secretname${number}`) - cy.enterTextInTestId('secret-modal-value-input', `secretvalue${number}`) - cy.enterTextInTestId('secret-modal-description-input', `secretdescription${number}`) - cy.clickOptionWithTestId("secret-modal-create-button"); - cy.waitTextVisible("Created secret!"); - cy.get("#role").type(role); - cy.get("button").contains("Next").click(); - cy.waitTextVisible("Configure an Ingestion Schedule"); - cy.get("button").contains("Next").click(); - cy.waitTextVisible("Give this ingestion source a name."); - cy.get('[data-testid="source-name-input"]').type(ingestion_source_name); - cy.get("button").contains("Save").click(); - cy.waitTextVisible("Successfully created ingestion source!").wait(5000)//prevent issue with missing form data - cy.waitTextVisible(ingestion_source_name); - cy.get("button").contains("Pending...").should("be.visible"); + // Verify secret can be added during ingestion source creation and used successfully + cy.clickOptionWithText("Create Secret"); + cy.enterTextInTestId("secret-modal-name-input", `secretname${number}`); + cy.enterTextInTestId("secret-modal-value-input", `secretvalue${number}`); + cy.enterTextInTestId( + "secret-modal-description-input", + `secretdescription${number}`, + ); + cy.clickOptionWithTestId("secret-modal-create-button"); + cy.waitTextVisible("Created secret!"); + cy.get("#role").type(role); + cy.get("button").contains("Next").click(); + cy.waitTextVisible("Configure an Ingestion Schedule"); + cy.get("button").contains("Next").click(); + cy.waitTextVisible("Give this ingestion source a name."); + cy.get('[data-testid="source-name-input"]').type(ingestion_source_name); + cy.get("button").contains("Save").click(); + cy.waitTextVisible("Successfully created ingestion source!").wait(5000); // prevent issue with missing form data + cy.waitTextVisible(ingestion_source_name); + cy.get("button").contains("Pending...").should("be.visible"); - //Remove ingestion source and secret - cy.goToIngestionPage(); - cy.get('[data-testid="delete-button"]').first().click(); - cy.waitTextVisible("Confirm Ingestion Source Removal"); - cy.get("button").contains("Yes").click(); - cy.waitTextVisible("Removed ingestion source."); - cy.ensureTextNotPresent(ingestion_source_name) - cy.clickOptionWithText("Secrets"); - cy.waitTextVisible(`secretname${number}`); - cy.get('[data-icon="delete"]').first().click(); - cy.waitTextVisible("Confirm Secret Removal"); - cy.get("button").contains("Yes").click(); - cy.waitTextVisible("Removed secret."); - cy.ensureTextNotPresent(`secretname${number}`); - cy.ensureTextNotPresent(`secretdescription${number}`); - }) -}); \ No newline at end of file + // Remove ingestion source and secret + cy.goToIngestionPage(); + cy.get('[data-testid="delete-button"]').first().click(); + cy.waitTextVisible("Confirm Ingestion Source Removal"); + cy.get("button").contains("Yes").click(); + cy.waitTextVisible("Removed ingestion source."); + cy.ensureTextNotPresent(ingestion_source_name); + cy.clickOptionWithText("Secrets"); + cy.waitTextVisible(`secretname${number}`); + cy.get('[data-icon="delete"]').first().click(); + cy.waitTextVisible("Confirm Secret Removal"); + cy.get("button").contains("Yes").click(); + cy.waitTextVisible("Removed secret."); + cy.ensureTextNotPresent(`secretname${number}`); + cy.ensureTextNotPresent(`secretdescription${number}`); + }); +}); diff --git a/smoke-test/tests/cypress/cypress/e2e/mutations/mutations.js b/smoke-test/tests/cypress/cypress/e2e/mutations/mutations.js index e2a74a15d3dfc..cf19a34b71761 100644 --- a/smoke-test/tests/cypress/cypress/e2e/mutations/mutations.js +++ b/smoke-test/tests/cypress/cypress/e2e/mutations/mutations.js @@ -4,35 +4,36 @@ describe("mutations", () => { let businessAttributeEntityEnabled; beforeEach(() => { - cy.intercept("POST", "/api/v2/graphql", (req) => { - aliasQuery(req, "appConfig"); - }); - }); - - const setBusinessAttributeFeatureFlag = () => { - cy.intercept("POST", "/api/v2/graphql", (req) => { - if (hasOperationName(req, "appConfig")) { - req.reply((res) => { - businessAttributeEntityEnabled = res.body.data.appConfig.featureFlags.businessAttributeEntityEnabled; - return res; - }); - } - }).as('apiCall'); - }; + cy.intercept("POST", "/api/v2/graphql", (req) => { + aliasQuery(req, "appConfig"); + }); + }); + + const setBusinessAttributeFeatureFlag = () => { + cy.intercept("POST", "/api/v2/graphql", (req) => { + if (hasOperationName(req, "appConfig")) { + req.reply((res) => { + businessAttributeEntityEnabled = + res.body.data.appConfig.featureFlags.businessAttributeEntityEnabled; + return res; + }); + } + }).as("apiCall"); + }; before(() => { // warm up elastic by issuing a `*` search cy.login(); - //Commented below function, and used individual commands below with wait + // Commented below function, and used individual commands below with wait // cy.goToStarSearchList(); cy.visit("/search?query=%2A"); - cy.wait(3000) - cy.waitTextVisible("Showing") - cy.waitTextVisible("results") + cy.wait(3000); + cy.waitTextVisible("Showing"); + cy.waitTextVisible("results"); cy.wait(2000); - cy.get('body').then(($body) => { + cy.get("body").then(($body) => { if ($body.find('button[aria-label="Close"]').length > 0) { - cy.get('button[aria-label="Close"]').click(); + cy.get('button[aria-label="Close"]').click(); } }); cy.wait(2000); @@ -41,10 +42,13 @@ describe("mutations", () => { it("can create and add a tag to dataset and visit new tag page", () => { // cy.deleteUrn("urn:li:tag:CypressTestAddTag"); cy.login(); - cy.goToDataset("urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)", "cypress_logging_events"); - cy.get('body').then(($body) => { + cy.goToDataset( + "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)", + "cypress_logging_events", + ); + cy.get("body").then(($body) => { if ($body.find('button[aria-label="Close"]').length > 0) { - cy.get('button[aria-label="Close"]').click(); + cy.get('button[aria-label="Close"]').click(); } }); cy.contains("Add Tag").click({ force: true }); @@ -80,13 +84,13 @@ describe("mutations", () => { // used by panel - click to search cy.wait(3000); - cy.contains("1 Datasets").click({ force: true }); + cy.contains("1 Datasets").click({ force: true }); // verify dataset shows up in search now cy.contains("of 1 result").click({ force: true }); cy.contains("cypress_logging_events").click({ force: true }); cy.get('[data-testid="tag-CypressTestAddTag"]').within(() => - cy.get("span[aria-label=close]").click() + cy.get("span[aria-label=close]").click(), ); cy.contains("Yes").click(); @@ -99,11 +103,11 @@ describe("mutations", () => { cy.addTermToDataset( "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)", "cypress_logging_events", - "CypressTerm" - ) + "CypressTerm", + ); cy.get( - 'a[href="/glossaryTerm/urn:li:glossaryTerm:CypressNode.CypressTerm"]' + 'a[href="/glossaryTerm/urn:li:glossaryTerm:CypressNode.CypressTerm"]', ).within(() => cy.get("span[aria-label=close]").click()); cy.contains("Yes").click(); @@ -113,10 +117,13 @@ describe("mutations", () => { it("can add and remove tags from a dataset field", () => { cy.login(); cy.viewport(2000, 800); - cy.goToDataset("urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)", "cypress_logging_events"); + cy.goToDataset( + "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)", + "cypress_logging_events", + ); cy.clickOptionWithText("event_name"); cy.get('[data-testid="schema-field-event_name-tags"]').within(() => - cy.contains("Add Tag").click() + cy.contains("Add Tag").click(), ); cy.enterTextInTestId("tag-term-modal-input", "CypressTestAddTag2"); @@ -159,7 +166,7 @@ describe("mutations", () => { cy .get("span[aria-label=close]") .trigger("mouseover", { force: true }) - .click({ force: true }) + .click({ force: true }), ); cy.contains("Yes").click({ force: true }); @@ -172,10 +179,13 @@ describe("mutations", () => { cy.login(); // make space for the glossary term column cy.viewport(2000, 800); - cy.goToDataset("urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)", "cypress_logging_events"); + cy.goToDataset( + "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)", + "cypress_logging_events", + ); cy.clickOptionWithText("event_name"); cy.get('[data-testid="schema-field-event_name-terms"]').within(() => - cy.contains("Add Term").click({ force: true }) + cy.contains("Add Term").click({ force: true }), ); cy.selectOptionInTagTermModal("CypressTerm"); @@ -186,7 +196,7 @@ describe("mutations", () => { cy .get("span[aria-label=close]") .trigger("mouseover", { force: true }) - .click({ force: true }) + .click({ force: true }), ); cy.contains("Yes").click({ force: true }); @@ -195,39 +205,42 @@ describe("mutations", () => { it("can add and remove business attribute from a dataset field", () => { setBusinessAttributeFeatureFlag(); - cy.login(); - // make space for the glossary term column - cy.viewport(2000, 800); - cy.visit("/dataset/" + "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)"); - cy.wait('@apiCall').then(() => { - if (!businessAttributeEntityEnabled) { - return; - } - cy.wait(5000); - cy.waitTextVisible("cypress_logging_events"); - cy.clickOptionWithText("event_data"); - cy.get('[data-testid="schema-field-event_data-businessAttribute"]').trigger( - "mouseover", - { force: true } - ); - cy.get('[data-testid="schema-field-event_data-businessAttribute"]').within(() => - cy.contains("Add Attribute").click({ force: true }) - ); - - cy.selectOptionInAttributeModal("cypressTestAttribute"); - cy.wait(2000); - cy.contains("cypressTestAttribute"); - - cy.get('[data-testid="schema-field-event_data-businessAttribute"]'). - within(() => - cy - .get("span[aria-label=close]") - .trigger("mouseover", { force: true }) - .click({ force: true }) - ); - cy.contains("Yes").click({ force: true }); - - cy.contains("cypressTestAttribute").should("not.exist"); - }); - }); + cy.login(); + // make space for the glossary term column + cy.viewport(2000, 800); + cy.visit( + "/dataset/" + + "urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)", + ); + cy.wait("@apiCall").then(() => { + if (!businessAttributeEntityEnabled) { + return; + } + cy.wait(5000); + cy.waitTextVisible("cypress_logging_events"); + cy.clickOptionWithText("event_data"); + cy.get( + '[data-testid="schema-field-event_data-businessAttribute"]', + ).trigger("mouseover", { force: true }); + cy.get( + '[data-testid="schema-field-event_data-businessAttribute"]', + ).within(() => cy.contains("Add Attribute").click({ force: true })); + + cy.selectOptionInAttributeModal("cypressTestAttribute"); + cy.wait(2000); + cy.contains("cypressTestAttribute"); + + cy.get( + '[data-testid="schema-field-event_data-businessAttribute"]', + ).within(() => + cy + .get("span[aria-label=close]") + .trigger("mouseover", { force: true }) + .click({ force: true }), + ); + cy.contains("Yes").click({ force: true }); + + cy.contains("cypressTestAttribute").should("not.exist"); + }); + }); }); diff --git a/smoke-test/tests/cypress/cypress/e2e/operations/operations.js b/smoke-test/tests/cypress/cypress/e2e/operations/operations.js index 8b95656bfe8ba..4d416380c164d 100644 --- a/smoke-test/tests/cypress/cypress/e2e/operations/operations.js +++ b/smoke-test/tests/cypress/cypress/e2e/operations/operations.js @@ -1,11 +1,12 @@ -describe('operations', () => { - it('can visit dataset with operation aspect and verify last updated is present', () => { - cy.login(); - cy.visit('/dataset/urn:li:dataset:(urn:li:dataPlatform:bigquery,test-project.bigquery_usage_logs.cypress_logging_events,PROD)/Stats?is_lineage_mode=false'); - cy.contains('test-project.bigquery_usage_logs.cypress_logging_events'); - - // Last updated text is present - cy.contains('Last Updated') - }); - }) - \ No newline at end of file +describe("operations", () => { + it("can visit dataset with operation aspect and verify last updated is present", () => { + cy.login(); + cy.visit( + "/dataset/urn:li:dataset:(urn:li:dataPlatform:bigquery,test-project.bigquery_usage_logs.cypress_logging_events,PROD)/Stats?is_lineage_mode=false", + ); + cy.contains("test-project.bigquery_usage_logs.cypress_logging_events"); + + // Last updated text is present + cy.contains("Last Updated"); + }); +}); diff --git a/smoke-test/tests/cypress/cypress/e2e/ownership/manage_ownership.js b/smoke-test/tests/cypress/cypress/e2e/ownership/manage_ownership.js index 6d34b7604b74a..1c10c4a8edcea 100644 --- a/smoke-test/tests/cypress/cypress/e2e/ownership/manage_ownership.js +++ b/smoke-test/tests/cypress/cypress/e2e/ownership/manage_ownership.js @@ -8,32 +8,32 @@ describe("manage ownership", () => { cy.clickOptionWithText("Create new Ownership Type"); cy.get('[data-testid="ownership-type-name-input"]').clear("T"); cy.get('[data-testid="ownership-type-name-input"]').type( - "Test Ownership Type" + "Test Ownership Type", ); cy.get('[data-testid="ownership-type-description-input"]').clear("T"); cy.get('[data-testid="ownership-type-description-input"]').type( - "This is a test ownership type description." + "This is a test ownership type description.", ); cy.get('[data-testid="ownership-builder-save"]').click(); cy.wait(3000); cy.waitTextVisible("Test Ownership Type"); cy.get( - '[data-row-key="Test Ownership Type"] > :nth-child(3) > .anticon > svg' + '[data-row-key="Test Ownership Type"] > :nth-child(3) > .anticon > svg', ).click(); cy.clickOptionWithText("Edit"); cy.get('[data-testid="ownership-type-description-input"]').clear( - "This is an test ownership type description." + "This is an test ownership type description.", ); cy.get('[data-testid="ownership-type-description-input"]').type( - "This is an edited test ownership type description." + "This is an edited test ownership type description.", ); cy.get('[data-testid="ownership-builder-save"] > span').click(); cy.wait(3000); cy.waitTextVisible("This is an edited test ownership type description."); cy.get( - '[data-row-key="Test Ownership Type"] > :nth-child(3) > .anticon > svg' + '[data-row-key="Test Ownership Type"] > :nth-child(3) > .anticon > svg', ).click(); cy.clickOptionWithText("Delete"); cy.get(".ant-popover-buttons > .ant-btn-primary").click(); diff --git a/smoke-test/tests/cypress/cypress/e2e/query/query_tab.js b/smoke-test/tests/cypress/cypress/e2e/query/query_tab.js index 015ce8c058eb8..f03aa6afda4a2 100644 --- a/smoke-test/tests/cypress/cypress/e2e/query/query_tab.js +++ b/smoke-test/tests/cypress/cypress/e2e/query/query_tab.js @@ -1,71 +1,82 @@ -const DATASET_URN = 'urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleCypressHdfsDataset,PROD)'; -const runId = Date.now() +const DATASET_URN = + "urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleCypressHdfsDataset,PROD)"; +const runId = Date.now(); const addNewQuery = () => { cy.get('[data-testid="add-query-button"]').click(); - cy.get('[data-mode-id="sql"]').click() - .type(` + Test Query-${runId}`); - cy.get('[data-testid="query-builder-title-input"]').click() - .type(`Test Table-${runId}`); - cy.get('.ProseMirror').click() - .type(`Test Description-${runId}`); + cy.get('[data-mode-id="sql"]').click().type(` + Test Query-${runId}`); + cy.get('[data-testid="query-builder-title-input"]') + .click() + .type(`Test Table-${runId}`); + cy.get(".ProseMirror").click().type(`Test Description-${runId}`); cy.get('[data-testid="query-builder-save-button"]').click(); cy.waitTextVisible("Created Query!"); -} +}; const editQuery = () => { - cy.get('[data-testid="query-edit-button-0"]').click() - cy.get('[data-mode-id="sql"]').click() - .type(` + Edited Query-${runId}`); - cy.get('[data-testid="query-builder-title-input"]').click().clear() - .type(`Edited Table-${runId}`); - cy.get('.ProseMirror').click().clear() - .type(`Edited Description-${runId}`); + cy.get('[data-testid="query-edit-button-0"]').click(); + cy.get('[data-mode-id="sql"]').click().type(` + Edited Query-${runId}`); + cy.get('[data-testid="query-builder-title-input"]') + .click() + .clear() + .type(`Edited Table-${runId}`); + cy.get(".ProseMirror").click().clear().type(`Edited Description-${runId}`); cy.get('[data-testid="query-builder-save-button"]').click(); cy.waitTextVisible("Edited Query!"); - } +}; - const deleteQuery = () => { - cy.get('[data-testid="query-more-button-0"]').click(); - cy.clickOptionWithText("Delete"); - cy.clickOptionWithText('Yes') - cy.waitTextVisible("Deleted Query!"); - } +const deleteQuery = () => { + cy.get('[data-testid="query-more-button-0"]').click(); + cy.clickOptionWithText("Delete"); + cy.clickOptionWithText("Yes"); + cy.waitTextVisible("Deleted Query!"); +}; - const verifyViewCardDetails = (query,title,description) => { - cy.get('[data-testid="query-content-0"]').scrollIntoView().should('be.visible').click() - cy.get('.ant-modal-content').waitTextVisible(query); - cy.get('.ant-modal-content').waitTextVisible(title); - cy.get('.ant-modal-content').waitTextVisible(description); +const verifyViewCardDetails = (query, title, description) => { + cy.get('[data-testid="query-content-0"]') + .scrollIntoView() + .should("be.visible") + .click(); + cy.get(".ant-modal-content").waitTextVisible(query); + cy.get(".ant-modal-content").waitTextVisible(title); + cy.get(".ant-modal-content").waitTextVisible(description); }; describe("manage queries", () => { beforeEach(() => { cy.loginWithCredentials(); - cy.goToDataset(DATASET_URN,"SampleCypressHdfsDataset"); - cy.openEntityTab("Queries") - }) - + cy.goToDataset(DATASET_URN, "SampleCypressHdfsDataset"); + cy.openEntityTab("Queries"); + }); + it("go to queries tab on dataset page then create query and verify & view the card", () => { - cy.waitTextVisible("Highlighted Queries"); - cy.ensureTextNotPresent("Recent Queries"); - addNewQuery(); - cy.waitTextVisible(`+ Test Query-${runId}`); - cy.waitTextVisible(`Test Table-${runId}`); - cy.waitTextVisible(`Test Description-${runId}`); - cy.waitTextVisible("Created on"); - verifyViewCardDetails(`+ Test Query-${runId}`,`Test Table-${runId}`,`Test Description-${runId}`) + cy.waitTextVisible("Highlighted Queries"); + cy.ensureTextNotPresent("Recent Queries"); + addNewQuery(); + cy.waitTextVisible(`+ Test Query-${runId}`); + cy.waitTextVisible(`Test Table-${runId}`); + cy.waitTextVisible(`Test Description-${runId}`); + cy.waitTextVisible("Created on"); + verifyViewCardDetails( + `+ Test Query-${runId}`, + `Test Table-${runId}`, + `Test Description-${runId}`, + ); }); - it("go to queries tab on dataset page then edit the query and verify edited Query card", () => { - editQuery(); - verifyViewCardDetails(`+ Test Query-${runId} + Edited Query-${runId}`,`Edited Table-${runId}`,`Edited Description-${runId}`) - }); + it("go to queries tab on dataset page then edit the query and verify edited Query card", () => { + editQuery(); + verifyViewCardDetails( + `+ Test Query-${runId} + Edited Query-${runId}`, + `Edited Table-${runId}`, + `Edited Description-${runId}`, + ); + }); - it("go to queries tab on dataset page then delete the query and verify that query should be gone", () => { - deleteQuery(); - cy.ensureTextNotPresent(`+ Test Query-${runId} + Edited Query-${runId}`); - cy.ensureTextNotPresent(`Edited Table-${runId}`); - cy.ensureTextNotPresent(`Edited Description-${runId}`); - }); -}); \ No newline at end of file + it("go to queries tab on dataset page then delete the query and verify that query should be gone", () => { + deleteQuery(); + cy.ensureTextNotPresent(`+ Test Query-${runId} + Edited Query-${runId}`); + cy.ensureTextNotPresent(`Edited Table-${runId}`); + cy.ensureTextNotPresent(`Edited Description-${runId}`); + }); +}); diff --git a/smoke-test/tests/cypress/cypress/e2e/schema_blame/schema_blame.js b/smoke-test/tests/cypress/cypress/e2e/schema_blame/schema_blame.js index 2218cbd95cf9d..4d5d4f324e0aa 100644 --- a/smoke-test/tests/cypress/cypress/e2e/schema_blame/schema_blame.js +++ b/smoke-test/tests/cypress/cypress/e2e/schema_blame/schema_blame.js @@ -1,55 +1,105 @@ -describe('schema blame', () => { - Cypress.on('uncaught:exception', (err, runnable) => { - return false; - }); - - it('can activate the blame view and verify for the latest version of a dataset', () => { - cy.login(); - cy.visit('/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)/Schema'); - cy.wait(10000); - - // Verify which fields are present, along with checking descriptions and tags - cy.contains('field_foo'); - cy.contains('field_baz'); - cy.contains('field_bar').should('not.exist'); - cy.clickOptionWithText("field_foo"); - cy.contains('Foo field description has changed'); - cy.contains('Baz field description'); - cy.get('[data-testid="schema-field-field_foo-tags"]').contains('Legacy'); - - // Make sure the schema blame is accurate - cy.get('[data-testid="schema-blame-button"]').click({ force: true }); - cy.wait(3000); - - cy.get('[data-testid="field_foo-schema-blame-description"]').contains("Modified in v1.0.0"); - cy.get('[data-testid="field_baz-schema-blame-description"]').contains("Added in v1.0.0"); - - // Verify the "view blame prior to" button changes state by modifying the URL - cy.get('[data-testid="field_foo-view-prior-blame-button"]').click({force: true}); - cy.wait(3000); - - cy.url().should('include', 'semantic_version=1.0.0'); +describe("schema blame", () => { + Cypress.on("uncaught:exception", (err, runnable) => false); + + it("can activate the blame view and verify for the latest version of a dataset", () => { + cy.login(); + cy.visit( + "/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)/Schema", + ); + cy.wait(10000); + + // Verify which fields are present, along with checking descriptions and tags + cy.contains("field_foo"); + cy.contains("field_baz"); + cy.contains("field_bar").should("not.exist"); + cy.clickOptionWithText("field_foo"); + cy.contains("Foo field description has changed"); + cy.contains("Baz field description"); + cy.get('[data-testid="schema-field-field_foo-tags"]').contains("Legacy"); + + // Make sure the schema blame is accurate + cy.get('[data-testid="schema-blame-button"]').click({ force: true }); + cy.wait(3000); + + cy.get('[data-testid="field_foo-schema-blame-description"]').contains( + "Modified in v1.0.0", + ); + cy.get('[data-testid="field_baz-schema-blame-description"]').contains( + "Added in v1.0.0", + ); + + // Verify the "view blame prior to" button changes state by modifying the URL + cy.get('[data-testid="field_foo-view-prior-blame-button"]').click({ + force: true, + }); + cy.wait(3000); + + cy.url().should("include", "semantic_version=1.0.0"); + }); + + it("can activate the blame view and verify for an older version of a dataset", () => { + cy.login(); + cy.visit( + "/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)/Schema", + ); + cy.wait(10000); + + // Verify which fields are present, along with checking descriptions and tags + cy.contains("field_foo"); + cy.contains("field_baz"); + cy.contains("field_bar").should("not.exist"); + cy.contains("Foo field description has changed"); + cy.contains("Baz field description"); + cy.clickOptionWithText("field_foo"); + cy.get('[data-testid="schema-field-field_foo-tags"]').contains("Legacy"); + + // Make sure the schema blame is accurate + cy.get('[data-testid="schema-blame-button"]').click({ force: true }); + cy.wait(3000); + + cy.get('[data-testid="field_foo-schema-blame-description"]').contains( + "Modified in v1.0.0", + ); + cy.get('[data-testid="field_baz-schema-blame-description"]').contains( + "Added in v1.0.0", + ); + + // Verify the "view blame prior to" button changes state by modifying the URL + cy.get('[data-testid="field_foo-view-prior-blame-button"]').click({ + force: true, + }); + cy.wait(3000); + + cy.url().should("include", "semantic_version=1.0.0"); }); - it('can activate the blame view and verify for an older version of a dataset', () => { + it("can activate the blame view and verify for an older version of a dataset", () => { cy.login(); - cy.visit('/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)/Schema?semantic_version=0.0.0'); + cy.visit( + "/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,SampleCypressHiveDataset,PROD)/Schema?semantic_version=0.0.0", + ); cy.wait(10000); - // Verify which fields are present, along with checking descriptions and tags - cy.contains('field_foo'); - cy.contains('field_bar'); - cy.contains('field_baz').should('not.exist'); - cy.contains('Foo field description'); - cy.contains('Bar field description'); + // Verify which fields are present, along with checking descriptions and tags + cy.contains("field_foo"); + cy.contains("field_bar"); + cy.contains("field_baz").should("not.exist"); + cy.contains("Foo field description"); + cy.contains("Bar field description"); cy.clickOptionWithText("field_foo"); - cy.get('[data-testid="schema-field-field_foo-tags"]').contains('Legacy').should('not.exist'); + cy.get('[data-testid="schema-field-field_foo-tags"]') + .contains("Legacy") + .should("not.exist"); // Make sure the schema blame is accurate cy.get('[data-testid="schema-blame-button"]').click({ force: true }); cy.wait(3000); - cy.get('[data-testid="field_foo-schema-blame-description"]').contains("Added in v0.0.0"); - cy.get('[data-testid="field_bar-schema-blame-description"]').contains("Added in v0.0.0"); + cy.get('[data-testid="field_foo-schema-blame-description"]').contains( + "Added in v0.0.0", + ); + cy.get('[data-testid="field_bar-schema-blame-description"]').contains( + "Added in v0.0.0", + ); + }); }); -}) \ No newline at end of file diff --git a/smoke-test/tests/cypress/cypress/e2e/search/query_and_filter_search.js b/smoke-test/tests/cypress/cypress/e2e/search/query_and_filter_search.js index 4306ecfbcec28..32e61d37f7562 100644 --- a/smoke-test/tests/cypress/cypress/e2e/search/query_and_filter_search.js +++ b/smoke-test/tests/cypress/cypress/e2e/search/query_and_filter_search.js @@ -9,81 +9,79 @@ const selectFilteredEntity = (textToClick, entity, url) => { cy.get("[data-testid=update-filters]").click({ force: true }); cy.url().should("include", `${url}`); cy.get("[data-testid=update-filters]").should("not.be.visible"); - cy.get('.ant-pagination-next').scrollIntoView().should('be.visible'); + cy.get(".ant-pagination-next").scrollIntoView().should("be.visible"); }; const verifyFilteredEntity = (text) => { - cy.get('.ant-typography').contains(text).should('be.visible'); + cy.get(".ant-typography").contains(text).should("be.visible"); }; const clickAndVerifyEntity = (entity) => { - cy.get('[class*="entityUrn-urn"]').first() - .find('a[href*="urn:li"] span[class^="ant-typography"]').last().invoke('text') + cy.get('[class*="entityUrn-urn"]') + .first() + .find('a[href*="urn:li"] span[class^="ant-typography"]') + .last() + .invoke("text") .then((text) => { cy.contains(text).click(); verifyFilteredEntity(text); verifyFilteredEntity(entity); }); - } +}; describe("auto-complete dropdown, filter plus query search test", () => { - beforeEach(() => { - cy.loginWithCredentials(); - cy.visit('/'); + cy.loginWithCredentials(); + cy.visit("/"); }); - - it("Verify the 'filter by type' section + query", () => { - //Dashboard + it("Verify the 'filter by type' section + query", () => { + // Dashboard searchToExecute("*"); selectFilteredEntity("Type", "Dashboards", "filter__entityType"); - clickAndVerifyEntity('Dashboard') + clickAndVerifyEntity("Dashboard"); - //Ml Models + // Ml Models searchToExecute("*"); selectFilteredEntity("Type", "ML Models", "filter__entityType"); - clickAndVerifyEntity('ML Model'); + clickAndVerifyEntity("ML Model"); - //Piplines + // Piplines searchToExecute("*"); selectFilteredEntity("Type", "Pipelines", "filter__entityType"); - clickAndVerifyEntity('Pipeline'); + clickAndVerifyEntity("Pipeline"); }); it("Verify the 'filter by Glossary term' section + query", () => { - - //Glossary Term - searchToExecute("*"); - selectFilteredEntity("Type", "Glossary Terms", "filter__entityType"); - clickAndVerifyEntity('Glossary Term') -}); + // Glossary Term + searchToExecute("*"); + selectFilteredEntity("Type", "Glossary Terms", "filter__entityType"); + clickAndVerifyEntity("Glossary Term"); + }); it("Verify the 'filter by platform' section + query", () => { - - //Hive + // Hive searchToExecute("*"); selectFilteredEntity("Platform", "Hive", "filter_platform"); - clickAndVerifyEntity('Hive') - - //HDFS + clickAndVerifyEntity("Hive"); + + // HDFS searchToExecute("*"); selectFilteredEntity("Platform", "HDFS", "filter_platform"); - clickAndVerifyEntity('HDFS') + clickAndVerifyEntity("HDFS"); - //Airflow + // Airflow searchToExecute("*"); selectFilteredEntity("Platform", "Airflow", "filter_platform"); - clickAndVerifyEntity('Airflow') + clickAndVerifyEntity("Airflow"); }); it("Verify the 'filter by tag' section + query", () => { - - //CypressFeatureTag + // CypressFeatureTag searchToExecute("*"); selectFilteredEntity("Tag", "CypressFeatureTag", "filter_tags"); - clickAndVerifyEntity('Tags') + clickAndVerifyEntity("Tags"); cy.mouseover('[data-testid="tag-CypressFeatureTag"]'); - verifyFilteredEntity('CypressFeatureTag'); + verifyFilteredEntity("CypressFeatureTag"); }); }); diff --git a/smoke-test/tests/cypress/cypress/e2e/search/search.js b/smoke-test/tests/cypress/cypress/e2e/search/search.js index bc11dbee5142c..ea46b2d8d012b 100644 --- a/smoke-test/tests/cypress/cypress/e2e/search/search.js +++ b/smoke-test/tests/cypress/cypress/e2e/search/search.js @@ -13,7 +13,7 @@ describe("search", () => { cy.visit("/"); // random string that is unlikely to accidentally have a match cy.get("input[data-testid=search-input]").type( - "zzzzzzzzzzzzzqqqqqqqqqqqqqzzzzzzqzqzqzqzq{enter}" + "zzzzzzzzzzzzzqqqqqqqqqqqqqzzzzzzqzqzqzqzq{enter}", ); cy.wait(5000); cy.contains("of 0 results"); @@ -22,7 +22,7 @@ describe("search", () => { it("can search, find a result, and visit the dataset page", () => { cy.login(); cy.visit( - "/search?filter_entity=DATASET&filter_tags=urn%3Ali%3Atag%3ACypress&page=1&query=users_created" + "/search?filter_entity=DATASET&filter_tags=urn%3Ali%3Atag%3ACypress&page=1&query=users_created", ); cy.contains("of 2 result"); @@ -51,7 +51,7 @@ describe("search", () => { it("can search and get glossary term facets with proper labels", () => { cy.login(); cy.visit( - "/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)" + "/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)", ); cy.contains("cypress_logging_events"); diff --git a/smoke-test/tests/cypress/cypress/e2e/search/searchFilters.js b/smoke-test/tests/cypress/cypress/e2e/search/searchFilters.js index f86418879ed04..761901c0c1175 100644 --- a/smoke-test/tests/cypress/cypress/e2e/search/searchFilters.js +++ b/smoke-test/tests/cypress/cypress/e2e/search/searchFilters.js @@ -54,7 +54,7 @@ describe("search", () => { cy.get("[data-testid=update-filters").click({ force: true }); cy.url().should( "include", - "filter_tags___false___EQUAL___0=urn%3Ali%3Atag%3ACypress" + "filter_tags___false___EQUAL___0=urn%3Ali%3Atag%3ACypress", ); cy.get("[data-testid=update-filters").should("not.exist"); @@ -64,7 +64,7 @@ describe("search", () => { cy.get("[data-testid=update-filters").click({ force: true }); cy.url().should( "include", - "filter__entityType%E2%90%9EtypeNames___false___EQUAL___1=DATASET" + "filter__entityType%E2%90%9EtypeNames___false___EQUAL___1=DATASET", ); // ensure expected entity is in search results @@ -78,7 +78,7 @@ describe("search", () => { cy.get("[data-testid=remove-filter-Datasets").click({ force: true }); cy.url().should( "not.include", - "filter__entityType%E2%90%9EtypeNames___false___EQUAL___1=DATASET" + "filter__entityType%E2%90%9EtypeNames___false___EQUAL___1=DATASET", ); cy.get("[data-testid=active-filter-Datasets").should("not.exist"); @@ -86,7 +86,7 @@ describe("search", () => { cy.get("[data-testid=clear-all-filters").click({ force: true }); cy.url().should( "not.include", - "filter_tags___false___EQUAL___0=urn%3Ali%3Atag%3ACypress" + "filter_tags___false___EQUAL___0=urn%3Ali%3Atag%3ACypress", ); cy.get("[data-testid=active-filter-Cypress").should("not.exist"); }); diff --git a/smoke-test/tests/cypress/cypress/e2e/settings/homePagePost.js b/smoke-test/tests/cypress/cypress/e2e/settings/homePagePost.js index 843a15d7430af..f9f76331ee7d7 100644 --- a/smoke-test/tests/cypress/cypress/e2e/settings/homePagePost.js +++ b/smoke-test/tests/cypress/cypress/e2e/settings/homePagePost.js @@ -1,85 +1,106 @@ const homePageRedirection = () => { - cy.visit('/'); - cy.waitTextPresent("Welcome back"); + cy.visit("/"); + cy.waitTextPresent("Welcome back"); }; const addOrEditAnnouncement = (text, title, description, testId) => { - cy.waitTextPresent(text); - cy.get('[data-testid="create-post-title"]').clear().type(title); - cy.get('[id="description"]').clear().type(description); - cy.get(`[data-testid="${testId}-post-button"]`).click({ force: true }); - cy.reload(); - homePageRedirection(); + cy.waitTextPresent(text); + cy.get('[data-testid="create-post-title"]').clear().type(title); + cy.get('[id="description"]').clear().type(description); + cy.get(`[data-testid="${testId}-post-button"]`).click({ force: true }); + cy.reload(); + homePageRedirection(); }; const addOrEditLink = (text, title, url, imagesURL, testId) => { - cy.waitTextPresent(text); - cy.get('[data-testid="create-post-title"]').clear().type(title); - cy.get('[data-testid="create-post-link"]').clear().type(url); - cy.get('[data-testid="create-post-media-location"]').clear().type(imagesURL); - cy.get(`[data-testid="${testId}-post-button"]`).click({ force: true }); - cy.reload(); - homePageRedirection(); + cy.waitTextPresent(text); + cy.get('[data-testid="create-post-title"]').clear().type(title); + cy.get('[data-testid="create-post-link"]').clear().type(url); + cy.get('[data-testid="create-post-media-location"]').clear().type(imagesURL); + cy.get(`[data-testid="${testId}-post-button"]`).click({ force: true }); + cy.reload(); + homePageRedirection(); }; -const clickOnNewPost = () =>{ - cy.get('[id="posts-create-post"]').click({ force: true }); -} +const clickOnNewPost = () => { + cy.get('[id="posts-create-post"]').click({ force: true }); +}; const clickOnMoreOption = () => { - cy.get('[aria-label="more"]').first().click(); -} + cy.get('[aria-label="more"]').first().click(); +}; describe("create announcement and link post", () => { - beforeEach(() => { - cy.loginWithCredentials(); - cy.goToHomePagePostSettings(); - }); + beforeEach(() => { + cy.loginWithCredentials(); + cy.goToHomePagePostSettings(); + }); - it("create announcement post and verify", () => { - clickOnNewPost() - addOrEditAnnouncement("Create new Post", "Test Announcement Title", "Add Description to post announcement", "create"); - cy.waitTextPresent("Test Announcement Title"); - }); + it("create announcement post and verify", () => { + clickOnNewPost(); + addOrEditAnnouncement( + "Create new Post", + "Test Announcement Title", + "Add Description to post announcement", + "create", + ); + cy.waitTextPresent("Test Announcement Title"); + }); - it("edit announced post and verify", () => { - clickOnMoreOption() - cy.clickOptionWithText("Edit"); - addOrEditAnnouncement("Edit Post", "Test Announcement Title Edited", "Decription Edited", "update"); - cy.waitTextPresent("Test Announcement Title Edited"); - }); + it("edit announced post and verify", () => { + clickOnMoreOption(); + cy.clickOptionWithText("Edit"); + addOrEditAnnouncement( + "Edit Post", + "Test Announcement Title Edited", + "Decription Edited", + "update", + ); + cy.waitTextPresent("Test Announcement Title Edited"); + }); - it("delete announced post and verify", () => { - clickOnMoreOption() - cy.clickOptionWithText("Delete"); - cy.clickOptionWithText("Yes"); - cy.reload(); - homePageRedirection(); - cy.ensureTextNotPresent("Test Announcement Title Edited"); - }); + it("delete announced post and verify", () => { + clickOnMoreOption(); + cy.clickOptionWithText("Delete"); + cy.clickOptionWithText("Yes"); + cy.reload(); + homePageRedirection(); + cy.ensureTextNotPresent("Test Announcement Title Edited"); + }); - it("create link post and verify", () => { - clickOnNewPost() - cy.waitTextPresent('Create new Post'); - cy.contains('label', 'Link').click(); - addOrEditLink("Create new Post", "Test Link Title", 'https://www.example.com', 'https://www.example.com/images/example-image.jpg', "create"); - cy.waitTextPresent("Test Link Title"); - }); + it("create link post and verify", () => { + clickOnNewPost(); + cy.waitTextPresent("Create new Post"); + cy.contains("label", "Link").click(); + addOrEditLink( + "Create new Post", + "Test Link Title", + "https://www.example.com", + "https://www.example.com/images/example-image.jpg", + "create", + ); + cy.waitTextPresent("Test Link Title"); + }); - it("edit linked post and verify", () => { - clickOnMoreOption() - cy.clickOptionWithText("Edit"); - addOrEditLink("Edit Post", "Test Link Edited Title", 'https://www.updatedexample.com', 'https://www.updatedexample.com/images/example-image.jpg', "update"); - cy.waitTextPresent("Test Link Edited Title"); - }); + it("edit linked post and verify", () => { + clickOnMoreOption(); + cy.clickOptionWithText("Edit"); + addOrEditLink( + "Edit Post", + "Test Link Edited Title", + "https://www.updatedexample.com", + "https://www.updatedexample.com/images/example-image.jpg", + "update", + ); + cy.waitTextPresent("Test Link Edited Title"); + }); - it("delete linked post and verify", () => { - clickOnMoreOption() - cy.clickOptionWithText("Delete"); - cy.clickOptionWithText("Yes"); - cy.reload(); - homePageRedirection(); - cy.ensureTextNotPresent("Test Link Edited Title"); - }); + it("delete linked post and verify", () => { + clickOnMoreOption(); + cy.clickOptionWithText("Delete"); + cy.clickOptionWithText("Yes"); + cy.reload(); + homePageRedirection(); + cy.ensureTextNotPresent("Test Link Edited Title"); + }); }); - diff --git a/smoke-test/tests/cypress/cypress/e2e/settings/manage_access_tokens.js b/smoke-test/tests/cypress/cypress/e2e/settings/manage_access_tokens.js index 7a77c2b77df5b..61ed8417cd450 100644 --- a/smoke-test/tests/cypress/cypress/e2e/settings/manage_access_tokens.js +++ b/smoke-test/tests/cypress/cypress/e2e/settings/manage_access_tokens.js @@ -1,43 +1,49 @@ import { aliasQuery, hasOperationName } from "../utils"; + const test_id = Math.floor(Math.random() * 100000); describe("manage access tokens", () => { - before(() => { - cy.intercept("POST", "/api/v2/graphql", (req) => { - aliasQuery(req, "appConfig"); - }); + before(() => { + cy.intercept("POST", "/api/v2/graphql", (req) => { + aliasQuery(req, "appConfig"); }); - - const setTokenAuthEnabledFlag = (isOn) => { - cy.intercept("POST", "/api/v2/graphql", (req) => { - if (hasOperationName(req, "appConfig")) { - req.reply((res) => { - res.body.data.appConfig.authConfig.tokenAuthEnabled = isOn; - }); - } - }); - }; + }); - it("create and revoke access token", () => { - //create access token, verify token on ui - setTokenAuthEnabledFlag(true); - cy.loginWithCredentials(); - cy.goToAccessTokenSettings(); - cy.clickOptionWithTestId("add-token-button"); - cy.enterTextInTestId("create-access-token-name", "Token Name" + test_id); - cy.enterTextInTestId("create-access-token-description", "Token Description" + test_id); - cy.clickOptionWithTestId("create-access-token-button"); - cy.waitTextVisible("New Personal Access Token"); - cy.get('[data-testid="access-token-value"]').should("be.visible"); - cy.get('[data-testid="access-token-value"]').invoke('text').should('match', /^[a-zA-Z0-9-_]+\.[a-zA-Z0-9-_]+\.[a-zA-Z0-9-_]+$/); - cy.clickOptionWithTestId("access-token-modal-close-button"); - //revoke access token, verify token removed from ui - cy.waitTextVisible("Token Name" + test_id); - cy.waitTextVisible("Token Description" + test_id); - cy.clickOptionWithTestId("revoke-token-button"); - cy.waitTextVisible("Are you sure you want to revoke this token?"); - cy.clickOptionWithText("Yes"); - cy.ensureTextNotPresent("Token Name" + test_id); - cy.ensureTextNotPresent("Token Description" + test_id); + const setTokenAuthEnabledFlag = (isOn) => { + cy.intercept("POST", "/api/v2/graphql", (req) => { + if (hasOperationName(req, "appConfig")) { + req.reply((res) => { + res.body.data.appConfig.authConfig.tokenAuthEnabled = isOn; + }); + } }); -}); \ No newline at end of file + }; + + it("create and revoke access token", () => { + // create access token, verify token on ui + setTokenAuthEnabledFlag(true); + cy.loginWithCredentials(); + cy.goToAccessTokenSettings(); + cy.clickOptionWithTestId("add-token-button"); + cy.enterTextInTestId("create-access-token-name", `Token Name${test_id}`); + cy.enterTextInTestId( + "create-access-token-description", + `Token Description${test_id}`, + ); + cy.clickOptionWithTestId("create-access-token-button"); + cy.waitTextVisible("New Personal Access Token"); + cy.get('[data-testid="access-token-value"]').should("be.visible"); + cy.get('[data-testid="access-token-value"]') + .invoke("text") + .should("match", /^[a-zA-Z0-9-_]+\.[a-zA-Z0-9-_]+\.[a-zA-Z0-9-_]+$/); + cy.clickOptionWithTestId("access-token-modal-close-button"); + // revoke access token, verify token removed from ui + cy.waitTextVisible(`Token Name${test_id}`); + cy.waitTextVisible(`Token Description${test_id}`); + cy.clickOptionWithTestId("revoke-token-button"); + cy.waitTextVisible("Are you sure you want to revoke this token?"); + cy.clickOptionWithText("Yes"); + cy.ensureTextNotPresent(`Token Name${test_id}`); + cy.ensureTextNotPresent(`Token Description${test_id}`); + }); +}); diff --git a/smoke-test/tests/cypress/cypress/e2e/settings/manage_policies.js b/smoke-test/tests/cypress/cypress/e2e/settings/manage_policies.js index 0e69a4e7f287a..98d66f4fc5102 100644 --- a/smoke-test/tests/cypress/cypress/e2e/settings/manage_policies.js +++ b/smoke-test/tests/cypress/cypress/e2e/settings/manage_policies.js @@ -4,19 +4,15 @@ const platform_policy_edited = `Platform test policy ${test_id} EDITED`; const metadata_policy_name = `Metadata test policy ${test_id}`; const metadata_policy_edited = `Metadata test policy ${test_id} EDITED`; - - function searchAndToggleMetadataPolicyStatus(metadataPolicyName, targetStatus) { - cy.get('[data-testid="search-input"]').should('be.visible'); + cy.get('[data-testid="search-input"]').should("be.visible"); cy.get('[data-testid="search-input"]').eq(1).type(metadataPolicyName); - cy.contains('tr', metadataPolicyName).as('metadataPolicyRow'); + cy.contains("tr", metadataPolicyName).as("metadataPolicyRow"); cy.contains(targetStatus).click(); } function clickFocusAndType(Id, text) { - cy.clickOptionWithTestId(Id) - .focused().clear() - .type(text); + cy.clickOptionWithTestId(Id).focused().clear().type(text); } function updateAndSave(Id, groupName, text) { @@ -30,20 +26,26 @@ function clickOnButton(saveButton) { } function createPolicy(decription, policyName) { - clickFocusAndType("policy-description", decription) + clickFocusAndType("policy-description", decription); clickOnButton("nextButton"); - updateAndSave("privileges", "All", "All Privileges", "nextButton") + updateAndSave("privileges", "All", "All Privileges", "nextButton"); clickOnButton("nextButton"); - updateAndSave("users", "All", "All Users") - updateAndSave("groups", "All", "All Groups") + updateAndSave("users", "All", "All Users"); + updateAndSave("groups", "All", "All Groups"); clickOnButton("saveButton"); cy.waitTextVisible("Successfully saved policy."); cy.waitTextVisible(policyName); } -function editPolicy(policyName, editPolicy, description, policyEdited, visibleDiscription) { - searchAndToggleMetadataPolicyStatus(policyName, 'EDIT') - cy.clickOptionWithTestId("policy-name") +function editPolicy( + policyName, + editPolicy, + description, + policyEdited, + visibleDiscription, +) { + searchAndToggleMetadataPolicyStatus(policyName, "EDIT"); + cy.clickOptionWithTestId("policy-name"); cy.focused().clear().type(editPolicy); cy.clickOptionWithTestId("policy-description"); cy.focused().clear().type(description); @@ -52,15 +54,15 @@ function editPolicy(policyName, editPolicy, description, policyEdited, visibleDi clickOnButton("saveButton"); cy.waitTextVisible("Successfully saved policy."); cy.waitTextVisible(policyEdited); - cy.waitTextVisible(visibleDiscription);; + cy.waitTextVisible(visibleDiscription); } function deletePolicy(policyEdited, deletePolicy) { - searchAndToggleMetadataPolicyStatus(policyEdited, 'DEACTIVATE') - cy.waitTextVisible("Successfully deactivated policy.") - cy.contains('DEACTIVATE').should('not.exist') - cy.contains('ACTIVATE').click(); - cy.waitTextVisible("Successfully activated policy.") + searchAndToggleMetadataPolicyStatus(policyEdited, "DEACTIVATE"); + cy.waitTextVisible("Successfully deactivated policy."); + cy.contains("DEACTIVATE").should("not.exist"); + cy.contains("ACTIVATE").click(); + cy.waitTextVisible("Successfully activated policy."); cy.get("[data-icon='delete']").click(); cy.waitTextVisible(deletePolicy); cy.clickOptionWithText("Yes"); @@ -77,37 +79,58 @@ describe("create and manage platform and metadata policies", () => { it("create platform policy", () => { cy.waitTextVisible("Manage Permissions"); cy.clickOptionWithText("Create new policy"); - clickFocusAndType("policy-name", platform_policy_name) + clickFocusAndType("policy-name", platform_policy_name); cy.get('[data-testid="policy-type"] [title="Metadata"]').click(); cy.clickOptionWithTestId("platform"); - createPolicy(`Platform policy description ${test_id}`, platform_policy_name) + createPolicy( + `Platform policy description ${test_id}`, + platform_policy_name, + ); }); it("edit platform policy", () => { - editPolicy(`${platform_policy_name}`, platform_policy_edited, + editPolicy( + `${platform_policy_name}`, + platform_policy_edited, + `Platform policy description ${test_id} EDITED`, + platform_policy_edited, `Platform policy description ${test_id} EDITED`, - platform_policy_edited, `Platform policy description ${test_id} EDITED`) + ); }); it("deactivate and activate platform policy", () => { - deletePolicy(`${platform_policy_edited}`, `Delete ${platform_policy_edited}`, `${platform_policy_edited}`) + deletePolicy( + `${platform_policy_edited}`, + `Delete ${platform_policy_edited}`, + `${platform_policy_edited}`, + ); }); it("create metadata policy", () => { cy.clickOptionWithText("Create new policy"); - clickFocusAndType("policy-name", metadata_policy_name) - cy.get('[data-testid="policy-type"]').should('have.text', 'Metadata'); - createPolicy(`Metadata policy description ${test_id}`, metadata_policy_name) + clickFocusAndType("policy-name", metadata_policy_name); + cy.get('[data-testid="policy-type"]').should("have.text", "Metadata"); + createPolicy( + `Metadata policy description ${test_id}`, + metadata_policy_name, + ); }); it("edit metadata policy", () => { - editPolicy(`${metadata_policy_name}`, metadata_policy_edited, + editPolicy( + `${metadata_policy_name}`, + metadata_policy_edited, `Metadata policy description ${test_id} EDITED`, - metadata_policy_edited, `Metadata policy description ${test_id} EDITED`) + metadata_policy_edited, + `Metadata policy description ${test_id} EDITED`, + ); }); it("deactivate and activate metadata policy", () => { - deletePolicy(`${metadata_policy_name}`, `Delete ${metadata_policy_name}`, `${metadata_policy_edited}`) + deletePolicy( + `${metadata_policy_name}`, + `Delete ${metadata_policy_name}`, + `${metadata_policy_edited}`, + ); }); - -}); \ No newline at end of file +}); diff --git a/smoke-test/tests/cypress/cypress/e2e/settings/managing_groups.js b/smoke-test/tests/cypress/cypress/e2e/settings/managing_groups.js index d9f69cd9a5ec4..247a9c8b9b273 100644 --- a/smoke-test/tests/cypress/cypress/e2e/settings/managing_groups.js +++ b/smoke-test/tests/cypress/cypress/e2e/settings/managing_groups.js @@ -1,127 +1,132 @@ const test_id = Math.floor(Math.random() * 100000); const username = `Example Name ${test_id}`; -const email = `example${test_id}@example.com` -const password = "Example password" +const email = `example${test_id}@example.com`; +const password = "Example password"; const group_name = `Test group ${test_id}`; describe("create and manage group", () => { - it("add test user", () => { - cy.loginWithCredentials(); - cy.visit("/settings/identities/users"); - cy.waitTextVisible("Invite Users"); - cy.clickOptionWithText("Invite Users"); - cy.waitTextVisible(/signup\?invite_token=\w{32}/).then(($elem) => { - const inviteLink = $elem.text(); - cy.visit("/settings/identities/users"); - cy.logout(); - cy.visit(inviteLink); - cy.enterTextInTestId("email", email); - cy.enterTextInTestId("name", username); - cy.enterTextInTestId("password", password); - cy.enterTextInTestId("confirmPassword", password); - cy.mouseover("#title").click(); - cy.waitTextVisible("Other").click(); - cy.get("[type=submit]").click(); - cy.waitTextVisible("Welcome to DataHub"); - cy.hideOnboardingTour(); - cy.waitTextVisible(username); - }) + it("add test user", () => { + cy.loginWithCredentials(); + cy.visit("/settings/identities/users"); + cy.waitTextVisible("Invite Users"); + cy.clickOptionWithText("Invite Users"); + cy.waitTextVisible(/signup\?invite_token=\w{32}/).then(($elem) => { + const inviteLink = $elem.text(); + cy.visit("/settings/identities/users"); + cy.logout(); + cy.visit(inviteLink); + cy.enterTextInTestId("email", email); + cy.enterTextInTestId("name", username); + cy.enterTextInTestId("password", password); + cy.enterTextInTestId("confirmPassword", password); + cy.mouseover("#title").click(); + cy.waitTextVisible("Other").click(); + cy.get("[type=submit]").click(); + cy.waitTextVisible("Welcome to DataHub"); + cy.hideOnboardingTour(); + cy.waitTextVisible(username); }); + }); - it("create a group", () => { - cy.loginWithCredentials(); - cy.visit("/settings/identities/groups") - cy.waitTextVisible("Create group"); - cy.clickOptionWithText("Create group"); - cy.waitTextVisible("Create new group"); - cy.get("#name").type(group_name); - cy.get("#description").type("Test group description"); - cy.contains("Advanced").click(); - cy.waitTextVisible("Group Id"); - cy.get("#groupId").type(test_id); - cy.get("#createGroupButton").click(); - cy.waitTextVisible("Created group!"); - cy.waitTextVisible(group_name); - }); + it("create a group", () => { + cy.loginWithCredentials(); + cy.visit("/settings/identities/groups"); + cy.waitTextVisible("Create group"); + cy.clickOptionWithText("Create group"); + cy.waitTextVisible("Create new group"); + cy.get("#name").type(group_name); + cy.get("#description").type("Test group description"); + cy.contains("Advanced").click(); + cy.waitTextVisible("Group Id"); + cy.get("#groupId").type(test_id); + cy.get("#createGroupButton").click(); + cy.waitTextVisible("Created group!"); + cy.waitTextVisible(group_name); + }); - it("add test user to a group", () => { - cy.loginWithCredentials(); - cy.visit("/settings/identities/users"); - cy.get(".ant-tabs-tab-btn").contains("Groups").click(); - cy.clickOptionWithText(group_name); - cy.get(".ant-typography").contains(group_name).should("be.visible"); - cy.get(".ant-tabs-tab").contains("Members").click(); - cy.waitTextVisible("No members in this group yet."); - cy.clickOptionWithText("Add Member"); - cy.contains("Search for users...").click({ force: true }); - cy.focused().type(username); - cy.get(".ant-select-item-option").contains(username).click(); - cy.focused().blur(); - cy.contains(username).should("have.length", 1); - cy.get('[role="dialog"] button').contains("Add").click({ force: true }); - cy.waitTextVisible("Group members added!"); - cy.contains(username, {timeout: 10000}).should("be.visible"); - }); + it("add test user to a group", () => { + cy.loginWithCredentials(); + cy.visit("/settings/identities/users"); + cy.get(".ant-tabs-tab-btn").contains("Groups").click(); + cy.clickOptionWithText(group_name); + cy.get(".ant-typography").contains(group_name).should("be.visible"); + cy.get(".ant-tabs-tab").contains("Members").click(); + cy.waitTextVisible("No members in this group yet."); + cy.clickOptionWithText("Add Member"); + cy.contains("Search for users...").click({ force: true }); + cy.focused().type(username); + cy.get(".ant-select-item-option").contains(username).click(); + cy.focused().blur(); + cy.contains(username).should("have.length", 1); + cy.get('[role="dialog"] button').contains("Add").click({ force: true }); + cy.waitTextVisible("Group members added!"); + cy.contains(username, { timeout: 10000 }).should("be.visible"); + }); - it("update group info", () => { - cy.loginWithCredentials(); - cy.visit("/settings/identities/groups"); - cy.clickOptionWithText(group_name); - cy.contains(group_name).find('[aria-label="Edit"]').click(); - cy.focused().clear().type(`Test group EDITED ${test_id}{enter}`); - cy.waitTextVisible("Name Updated"); - cy.contains(`Test group EDITED ${test_id}`).should("be.visible"); - cy.get('[data-testid="edit-icon"]').click(); - cy.waitTextVisible("Edit Description"); - cy.get("#description").should("be.visible").type(" EDITED"); - cy.get("#updateGroupButton").click(); - cy.waitTextVisible("Changes saved."); - cy.contains("Test group description EDITED").should("be.visible"); - cy.clickOptionWithText("Add Owners"); - cy.get('[id="owner"]').click({ force: true }); - cy.focused().type(username); - cy.get(".ant-select-item-option").contains(username, { matchCase: false }).click(); - cy.focused().blur(); - cy.contains(username, { matchCase: false }).should("have.length", 1); - cy.get('[role="dialog"] button').contains("Done").click(); - cy.waitTextVisible("Owners Added"); - cy.contains(username, { matchCase: false }).should("be.visible"); - cy.clickOptionWithText("Edit Group"); - cy.waitTextVisible("Edit Profile"); - cy.get("#email").type(`${test_id}@testemail.com`); - cy.get("#slack").type(`#${test_id}`); - cy.clickOptionWithText("Save Changes"); - cy.waitTextVisible("Changes saved."); - cy.waitTextVisible(`${test_id}@testemail.com`); - cy.waitTextVisible(`#${test_id}`); - }); + it("update group info", () => { + cy.loginWithCredentials(); + cy.visit("/settings/identities/groups"); + cy.clickOptionWithText(group_name); + cy.contains(group_name).find('[aria-label="Edit"]').click(); + cy.focused().clear().type(`Test group EDITED ${test_id}{enter}`); + cy.waitTextVisible("Name Updated"); + cy.contains(`Test group EDITED ${test_id}`).should("be.visible"); + cy.get('[data-testid="edit-icon"]').click(); + cy.waitTextVisible("Edit Description"); + cy.get("#description").should("be.visible").type(" EDITED"); + cy.get("#updateGroupButton").click(); + cy.waitTextVisible("Changes saved."); + cy.contains("Test group description EDITED").should("be.visible"); + cy.clickOptionWithText("Add Owners"); + cy.get('[id="owner"]').click({ force: true }); + cy.focused().type(username); + cy.get(".ant-select-item-option") + .contains(username, { matchCase: false }) + .click(); + cy.focused().blur(); + cy.contains(username, { matchCase: false }).should("have.length", 1); + cy.get('[role="dialog"] button').contains("Done").click(); + cy.waitTextVisible("Owners Added"); + cy.contains(username, { matchCase: false }).should("be.visible"); + cy.clickOptionWithText("Edit Group"); + cy.waitTextVisible("Edit Profile"); + cy.get("#email").type(`${test_id}@testemail.com`); + cy.get("#slack").type(`#${test_id}`); + cy.clickOptionWithText("Save Changes"); + cy.waitTextVisible("Changes saved."); + cy.waitTextVisible(`${test_id}@testemail.com`); + cy.waitTextVisible(`#${test_id}`); + }); - it("test User verify group participation", () => { - cy.loginWithCredentials(); - cy.visit("/settings/identities/groups"); - cy.hideOnboardingTour(); - cy.clickOptionWithText(`Test group EDITED ${test_id}`); - cy.get(".ant-tabs-tab").contains("Members").click(); - cy.waitTextVisible(username); - }); + it("test User verify group participation", () => { + cy.loginWithCredentials(); + cy.visit("/settings/identities/groups"); + cy.hideOnboardingTour(); + cy.clickOptionWithText(`Test group EDITED ${test_id}`); + cy.get(".ant-tabs-tab").contains("Members").click(); + cy.waitTextVisible(username); + }); - it("assign role to group ", () => { - cy.loginWithCredentials(); - cy.visit("/settings/identities/groups"); - cy.get(`[href="/group/urn:li:corpGroup:${test_id}"]`).next().click() - cy.get('.ant-select-item-option').contains('Admin').click() - cy.get('button.ant-btn-primary').contains('OK').click(); - cy.get(`[href="/group/urn:li:corpGroup:${test_id}"]`).waitTextVisible('Admin'); - }); - - it("remove group", () => { - cy.loginWithCredentials(); - cy.visit("/settings/identities/groups"); - cy.get(`[href="/group/urn:li:corpGroup:${test_id}"]`).openThreeDotDropdown() - cy.clickOptionWithText("Delete"); - cy.clickOptionWithText("Yes"); - cy.waitTextVisible("Deleted Group!"); - cy.ensureTextNotPresent(`Test group EDITED ${test_id}`); - }); + it("assign role to group ", () => { + cy.loginWithCredentials(); + cy.visit("/settings/identities/groups"); + cy.get(`[href="/group/urn:li:corpGroup:${test_id}"]`).next().click(); + cy.get(".ant-select-item-option").contains("Admin").click(); + cy.get("button.ant-btn-primary").contains("OK").click(); + cy.get(`[href="/group/urn:li:corpGroup:${test_id}"]`).waitTextVisible( + "Admin", + ); + }); -}); \ No newline at end of file + it("remove group", () => { + cy.loginWithCredentials(); + cy.visit("/settings/identities/groups"); + cy.get( + `[href="/group/urn:li:corpGroup:${test_id}"]`, + ).openThreeDotDropdown(); + cy.clickOptionWithText("Delete"); + cy.clickOptionWithText("Yes"); + cy.waitTextVisible("Deleted Group!"); + cy.ensureTextNotPresent(`Test group EDITED ${test_id}`); + }); +}); diff --git a/smoke-test/tests/cypress/cypress/e2e/siblings/siblings.js b/smoke-test/tests/cypress/cypress/e2e/siblings/siblings.js index f89b70b7a7d23..b6b6fea1a7d70 100644 --- a/smoke-test/tests/cypress/cypress/e2e/siblings/siblings.js +++ b/smoke-test/tests/cypress/cypress/e2e/siblings/siblings.js @@ -1,132 +1,157 @@ -describe('siblings', () => { - it('will merge metadata to non-primary sibling', () => { +describe("siblings", () => { + it("will merge metadata to non-primary sibling", () => { cy.login(); - cy.visit('/dataset/urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.customers,PROD)/?is_lineage_mode=false'); + cy.visit( + "/dataset/urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.customers,PROD)/?is_lineage_mode=false", + ); // check merged platforms - cy.contains('dbt & BigQuery'); + cy.contains("dbt & BigQuery"); // check merged schema (from dbt) - cy.contains('This is a unique identifier for a customer'); + cy.contains("This is a unique identifier for a customer"); // check merged profile (from bigquery) - cy.contains('Stats').click({ force: true }); + cy.contains("Stats").click({ force: true }); cy.get('[data-testid="table-stats-rowcount"]').contains("100"); - }); + }); - it('will merge metadata to primary sibling', () => { + it("will merge metadata to primary sibling", () => { cy.login(); - cy.visit('/dataset/urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers,PROD)/?is_lineage_mode=false'); + cy.visit( + "/dataset/urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers,PROD)/?is_lineage_mode=false", + ); // check merged platforms - cy.contains('dbt & BigQuery'); + cy.contains("dbt & BigQuery"); // check merged schema (from dbt) - cy.contains('This is a unique identifier for a customer'); + cy.contains("This is a unique identifier for a customer"); // check merged profile (from bigquery) - cy.contains('Stats').click({ force: true }); + cy.contains("Stats").click({ force: true }); cy.get('[data-testid="table-stats-rowcount"]').contains("100"); }); - it('can view individual nodes', () => { + it("can view individual nodes", () => { cy.login(); - const resizeObserverLoopErrRe = /^[^(ResizeObserver loop limit exceeded)]/ - cy.on('uncaught:exception', (err) => { - /* returning false here prevents Cypress from failing the test */ - if (resizeObserverLoopErrRe.test(err.message)) { - return false - } - }) + const resizeObserverLoopErrRe = /^[^(ResizeObserver loop limit exceeded)]/; + cy.on("uncaught:exception", (err) => { + /* returning false here prevents Cypress from failing the test */ + if (resizeObserverLoopErrRe.test(err.message)) { + return false; + } + }); - cy.visit('/dataset/urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers,PROD)/?is_lineage_mode=false'); + cy.visit( + "/dataset/urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers,PROD)/?is_lineage_mode=false", + ); // navigate to the bq entity - cy.clickOptionWithTestId('compact-entity-link-urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.customers,PROD)'); + cy.clickOptionWithTestId( + "compact-entity-link-urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.customers,PROD)", + ); // check merged platforms is not shown - cy.get('[data-testid="entity-header-test-id"]').contains('dbt & BigQuery').should('not.exist'); - cy.get('[data-testid="entity-header-test-id"]').contains('BigQuery'); + cy.get('[data-testid="entity-header-test-id"]') + .contains("dbt & BigQuery") + .should("not.exist"); + cy.get('[data-testid="entity-header-test-id"]').contains("BigQuery"); // check dbt schema descriptions not shown - cy.contains('This is a unique identifier for a customer').should('not.exist'); + cy.contains("This is a unique identifier for a customer").should( + "not.exist", + ); // check merged profile still there (from bigquery) - cy.contains('Stats').click({ force: true }); + cy.contains("Stats").click({ force: true }); cy.get('[data-testid="table-stats-rowcount"]').contains("100"); }); - it('can mutate at individual node or combined node level', () => { + it("can mutate at individual node or combined node level", () => { cy.login(); - cy.visit('/dataset/urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers,PROD)/?is_lineage_mode=false'); + cy.visit( + "/dataset/urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers,PROD)/?is_lineage_mode=false", + ); // navigate to the bq entity - cy.clickOptionWithTestId('compact-entity-link-urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.customers,PROD)'); + cy.clickOptionWithTestId( + "compact-entity-link-urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.customers,PROD)", + ); - cy.clickOptionWithText('Add Term'); + cy.clickOptionWithText("Add Term"); - cy.selectOptionInTagTermModal('CypressTerm'); + cy.selectOptionInTagTermModal("CypressTerm"); - cy.visit('/dataset/urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers,PROD)/?is_lineage_mode=false'); + cy.visit( + "/dataset/urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers,PROD)/?is_lineage_mode=false", + ); - cy.get('a[href="/glossaryTerm/urn:li:glossaryTerm:CypressNode.CypressTerm"]').within(() => cy.get('span[aria-label=close]').click()); - cy.clickOptionWithText('Yes'); + cy.get( + 'a[href="/glossaryTerm/urn:li:glossaryTerm:CypressNode.CypressTerm"]', + ).within(() => cy.get("span[aria-label=close]").click()); + cy.clickOptionWithText("Yes"); - cy.contains('CypressTerm').should('not.exist'); + cy.contains("CypressTerm").should("not.exist"); }); - it('will combine results in search', () => { + it("will combine results in search", () => { cy.login(); - cy.visit('/search?page=1&query=raw_orders'); + cy.visit("/search?page=1&query=raw_orders"); - cy.contains('Showing 1 - 10 of '); + cy.contains("Showing 1 - 10 of "); - cy.get('.test-search-result').should('have.length', 5); - cy.get('.test-search-result-sibling-section').should('have.length', 5); + cy.get(".test-search-result").should("have.length", 5); + cy.get(".test-search-result-sibling-section").should("have.length", 5); - cy.get('.test-search-result-sibling-section').get('.test-mini-preview-class:contains(raw_orders)').should('have.length', 2); + cy.get(".test-search-result-sibling-section") + .get(".test-mini-preview-class:contains(raw_orders)") + .should("have.length", 2); }); - it('will combine results in lineage', () => { + it("will combine results in lineage", () => { cy.login(); - cy.visit('dataset/urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.stg_orders,PROD)/?is_lineage_mode=true'); + cy.visit( + "dataset/urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.stg_orders,PROD)/?is_lineage_mode=true", + ); // check the subtypes - cy.get('text:contains(Table)').should('have.length', 2); - cy.get('text:contains(Seed)').should('have.length', 1); + cy.get("text:contains(Table)").should("have.length", 2); + cy.get("text:contains(Seed)").should("have.length", 1); // check the names - cy.get('text:contains(raw_orders)').should('have.length', 1); - cy.get('text:contains(customers)').should('have.length', 1); + cy.get("text:contains(raw_orders)").should("have.length", 1); + cy.get("text:contains(customers)").should("have.length", 1); // center counts twice since we secretely render two center nodes - cy.get('text:contains(stg_orders)').should('have.length', 2); + cy.get("text:contains(stg_orders)").should("have.length", 2); // check the platform - cy.get('svg').get('text:contains(dbt & BigQuery)').should('have.length', 5); + cy.get("svg").get("text:contains(dbt & BigQuery)").should("have.length", 5); }); - it('can separate results in lineage if flag is set', () => { + it("can separate results in lineage if flag is set", () => { cy.login(); - cy.visit('dataset/urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.stg_orders,PROD)/?is_lineage_mode=true'); + cy.visit( + "dataset/urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.stg_orders,PROD)/?is_lineage_mode=true", + ); - cy.clickOptionWithTestId('compress-lineage-toggle'); + cy.clickOptionWithTestId("compress-lineage-toggle"); // check the subtypes - cy.get('[data-testid="Seed"]').should('have.length', 1); + cy.get('[data-testid="Seed"]').should("have.length", 1); // center counts twice since we secretely render two center nodes, plus the downstream bigquery - cy.get('[data-testid="View"]').should('have.length', 3); - cy.get('[data-testid="Table"]').should('have.length', 0); - + cy.get('[data-testid="View"]').should("have.length", 3); + cy.get('[data-testid="Table"]').should("have.length", 0); // check the names - cy.get('text:contains(raw_orders)').should('have.length', 1); + cy.get("text:contains(raw_orders)").should("have.length", 1); // center counts twice since we secretely render two center nodes, plus the downstream bigquery - cy.get('text:contains(stg_orders)').should('have.length', 3); + cy.get("text:contains(stg_orders)").should("have.length", 3); // check the platform - cy.get('svg').get('text:contains(dbt & BigQuery)').should('have.length', 0); - cy.get('svg').get('text:contains(dbt)').should('have.length', 3); - cy.get('svg').get('text:contains(BigQuery)').should('have.length', 1); + cy.get("svg").get("text:contains(dbt & BigQuery)").should("have.length", 0); + cy.get("svg").get("text:contains(dbt)").should("have.length", 3); + cy.get("svg").get("text:contains(BigQuery)").should("have.length", 1); }); }); diff --git a/smoke-test/tests/cypress/cypress/e2e/task_runs/task_runs.js b/smoke-test/tests/cypress/cypress/e2e/task_runs/task_runs.js index ffe1884201c42..536310e70a564 100644 --- a/smoke-test/tests/cypress/cypress/e2e/task_runs/task_runs.js +++ b/smoke-test/tests/cypress/cypress/e2e/task_runs/task_runs.js @@ -1,39 +1,42 @@ -describe('task runs', () => { - it('can visit dataset with runs aspect and verify the task run is present', () => { - cy.visit('/') - cy.login(); - cy.visit('/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)/Runs'); - - // the run data should not be there since the run wrote - cy.contains('manual__2022-03-30T11:35:08.970522+00:00') - cy.contains('Failed'); - - // inputs - cy.contains('fct_cypress_users_created_no_tag'); - - // outputs - cy.contains('SampleCypressHiveDataset'); - cy.contains('cypress_logging_events'); - - // task name - cy.contains('User Creations'); - }); - - it('can visit task with runs aspect and verify the task run is present', () => { - cy.visit('/') - cy.login(); - cy.visit('/tasks/urn:li:dataJob:(urn:li:dataFlow:(airflow,cypress_dag_abc,PROD),cypress_task_123)/Runs?is_lineage_mode=false'); - - // Verify the run data is there - cy.contains('manual__2022-03-30T11:35:08.970522+00:00'); - cy.contains('Failed'); - - // inputs - cy.contains('fct_cypress_users_created_no_tag'); - - // outputs - cy.contains('SampleCypressHiveDataset'); - cy.contains('cypress_logging_events'); - }); - -}) +describe("task runs", () => { + it("can visit dataset with runs aspect and verify the task run is present", () => { + cy.visit("/"); + cy.login(); + cy.visit( + "/dataset/urn:li:dataset:(urn:li:dataPlatform:hive,cypress_logging_events,PROD)/Runs", + ); + + // the run data should not be there since the run wrote + cy.contains("manual__2022-03-30T11:35:08.970522+00:00"); + cy.contains("Failed"); + + // inputs + cy.contains("fct_cypress_users_created_no_tag"); + + // outputs + cy.contains("SampleCypressHiveDataset"); + cy.contains("cypress_logging_events"); + + // task name + cy.contains("User Creations"); + }); + + it("can visit task with runs aspect and verify the task run is present", () => { + cy.visit("/"); + cy.login(); + cy.visit( + "/tasks/urn:li:dataJob:(urn:li:dataFlow:(airflow,cypress_dag_abc,PROD),cypress_task_123)/Runs?is_lineage_mode=false", + ); + + // Verify the run data is there + cy.contains("manual__2022-03-30T11:35:08.970522+00:00"); + cy.contains("Failed"); + + // inputs + cy.contains("fct_cypress_users_created_no_tag"); + + // outputs + cy.contains("SampleCypressHiveDataset"); + cy.contains("cypress_logging_events"); + }); +}); diff --git a/smoke-test/tests/cypress/cypress/e2e/views/manage_views.js b/smoke-test/tests/cypress/cypress/e2e/views/manage_views.js index 9f3039ab4de77..92b63348c1bde 100644 --- a/smoke-test/tests/cypress/cypress/e2e/views/manage_views.js +++ b/smoke-test/tests/cypress/cypress/e2e/views/manage_views.js @@ -1,36 +1,44 @@ describe("manage views", () => { - it("go to views settings page, create, edit, make default, delete a view", () => { - const viewName = "Test View" - - cy.login(); - cy.goToViewsSettings(); - - cy.clickOptionWithText("Create new View"); - cy.get(".ant-input-affix-wrapper > input[type='text']").first().type(viewName); - cy.clickOptionWithTestId("view-builder-save"); - - // Confirm that the test has been created. - cy.waitTextVisible("Test View"); - - // Now edit the View - cy.clickFirstOptionWithTestId("views-table-dropdown"); - cy.get('[data-testid="view-dropdown-edit"]').click({ force: true }); - cy.get(".ant-input-affix-wrapper > input[type='text']").first().clear().type("New View Name"); - cy.clickOptionWithTestId("view-builder-save"); - cy.waitTextVisible("New View Name"); - - // Now make the view the default - cy.clickFirstOptionWithTestId("views-table-dropdown"); - cy.get('[data-testid="view-dropdown-set-user-default"]').click({ force: true }); - - // Now unset as the default - cy.clickFirstOptionWithTestId("views-table-dropdown"); - cy.get('[data-testid="view-dropdown-remove-user-default"]').click({ force: true }); - - // Now delete the View - cy.clickFirstOptionWithTestId("views-table-dropdown"); - cy.get('[data-testid="view-dropdown-delete"]').click({ force: true }); - cy.clickOptionWithText("Yes"); + it("go to views settings page, create, edit, make default, delete a view", () => { + const viewName = "Test View"; + + cy.login(); + cy.goToViewsSettings(); + + cy.clickOptionWithText("Create new View"); + cy.get(".ant-input-affix-wrapper > input[type='text']") + .first() + .type(viewName); + cy.clickOptionWithTestId("view-builder-save"); + + // Confirm that the test has been created. + cy.waitTextVisible("Test View"); + + // Now edit the View + cy.clickFirstOptionWithTestId("views-table-dropdown"); + cy.get('[data-testid="view-dropdown-edit"]').click({ force: true }); + cy.get(".ant-input-affix-wrapper > input[type='text']") + .first() + .clear() + .type("New View Name"); + cy.clickOptionWithTestId("view-builder-save"); + cy.waitTextVisible("New View Name"); + + // Now make the view the default + cy.clickFirstOptionWithTestId("views-table-dropdown"); + cy.get('[data-testid="view-dropdown-set-user-default"]').click({ + force: true, + }); + // Now unset as the default + cy.clickFirstOptionWithTestId("views-table-dropdown"); + cy.get('[data-testid="view-dropdown-remove-user-default"]').click({ + force: true, }); + + // Now delete the View + cy.clickFirstOptionWithTestId("views-table-dropdown"); + cy.get('[data-testid="view-dropdown-delete"]').click({ force: true }); + cy.clickOptionWithText("Yes"); + }); }); diff --git a/smoke-test/tests/cypress/cypress/e2e/views/view_select.js b/smoke-test/tests/cypress/cypress/e2e/views/view_select.js index 752f4c768ba40..09520818a2190 100644 --- a/smoke-test/tests/cypress/cypress/e2e/views/view_select.js +++ b/smoke-test/tests/cypress/cypress/e2e/views/view_select.js @@ -8,17 +8,16 @@ function openViewEditDropDownAndClickId(data_id) { describe("view select", () => { it("click view select, create view, clear view, make defaults, clear view", () => { cy.login(); - let randomNumber = Math.floor(Math.random() * 100000); + const randomNumber = Math.floor(Math.random() * 100000); const viewName = `Test View ${randomNumber}`; const newViewName = `New View Name ${randomNumber}`; // Resize Observer Loop warning can be safely ignored - ref. https://github.com/cypress-io/cypress/issues/22113 const resizeObserverLoopErrRe = "ResizeObserver loop limit exceeded"; - cy.on("uncaught:exception", (err) => { - if (err.message.includes(resizeObserverLoopErrRe)) { - return false; - } - }); + cy.on( + "uncaught:exception", + (err) => !err.message.includes(resizeObserverLoopErrRe), + ); cy.goToStarSearchList(); diff --git a/smoke-test/tests/cypress/cypress/plugins/index.js b/smoke-test/tests/cypress/cypress/plugins/index.js index 4dff56c3fcb20..161e6895aa260 100644 --- a/smoke-test/tests/cypress/cypress/plugins/index.js +++ b/smoke-test/tests/cypress/cypress/plugins/index.js @@ -19,5 +19,7 @@ module.exports = (on, config) => { // `on` is used to hook into various events Cypress emits // `config` is the resolved Cypress config - require('cypress-timestamps/plugin')(on); -} + + // eslint-disable-next-line global-require + require("cypress-timestamps/plugin")(on); +}; diff --git a/smoke-test/tests/cypress/cypress/support/commands.js b/smoke-test/tests/cypress/cypress/support/commands.js index c670e1b573245..b6aeccfeb81a5 100644 --- a/smoke-test/tests/cypress/cypress/support/commands.js +++ b/smoke-test/tests/cypress/cypress/support/commands.js @@ -13,48 +13,52 @@ import dayjs from "dayjs"; -function selectorWithtestId (id) { - return '[data-testid="' + id +'"]'; +function selectorWithtestId(id) { + return `[data-testid="${id}"]`; } -export function getTimestampMillisNumDaysAgo (numDays) { - return dayjs().subtract(numDays, 'day').valueOf(); +export function getTimestampMillisNumDaysAgo(numDays) { + return dayjs().subtract(numDays, "day").valueOf(); } - -Cypress.Commands.add('login', () => { +Cypress.Commands.add("login", () => { cy.request({ - method: 'POST', - url: '/logIn', + method: "POST", + url: "/logIn", body: { - username: Cypress.env('ADMIN_USERNAME'), - password: Cypress.env('ADMIN_PASSWORD'), + username: Cypress.env("ADMIN_USERNAME"), + password: Cypress.env("ADMIN_PASSWORD"), }, retryOnStatusCodeFailure: true, }); -}) +}); Cypress.Commands.add("loginWithCredentials", (username, password) => { - cy.visit('/'); - if (username,password) { - cy.get('input[data-testid=username]').type(username); - cy.get('input[data-testid=password]').type(password); + cy.visit("/"); + if ((username, password)) { + cy.get("input[data-testid=username]").type(username); + cy.get("input[data-testid=password]").type(password); } else { - cy.get('input[data-testid=username]').type(Cypress.env('ADMIN_USERNAME')); - cy.get('input[data-testid=password]').type(Cypress.env('ADMIN_PASSWORD')); + cy.get("input[data-testid=username]").type(Cypress.env("ADMIN_USERNAME")); + cy.get("input[data-testid=password]").type(Cypress.env("ADMIN_PASSWORD")); } - cy.contains('Sign In').click(); - cy.contains('Welcome back'); + cy.contains("Sign In").click(); + cy.contains("Welcome back"); }); -Cypress.Commands.add('deleteUrn', (urn) => { - cy.request({ method: 'POST', url: 'http://localhost:8080/entities?action=delete', body: { - urn - }, headers: { - "X-RestLi-Protocol-Version": "2.0.0", - "Content-Type": "application/json", - }}) -}) +Cypress.Commands.add("deleteUrn", (urn) => { + cy.request({ + method: "POST", + url: "http://localhost:8080/entities?action=delete", + body: { + urn, + }, + headers: { + "X-RestLi-Protocol-Version": "2.0.0", + "Content-Type": "application/json", + }, + }); +}); Cypress.Commands.add("logout", () => { cy.get(selectorWithtestId("manage-account-menu")).click(); @@ -107,91 +111,83 @@ Cypress.Commands.add("goToIngestionPage", () => { }); Cypress.Commands.add("goToDataset", (urn, dataset_name) => { - cy.visit( - "/dataset/" + urn - ); + cy.visit(`/dataset/${urn}`); cy.wait(5000); cy.waitTextVisible(dataset_name); }); Cypress.Commands.add("goToBusinessAttribute", (urn, attribute_name) => { - cy.visit( - "/business-attribute/" + urn - ); + cy.visit(`/business-attribute/${urn}`); cy.wait(5000); cy.waitTextVisible(attribute_name); }); Cypress.Commands.add("goToTag", (urn, tag_name) => { - cy.visit( - "/tag/" + urn - ); + cy.visit(`/tag/${urn}`); cy.wait(5000); cy.waitTextVisible(tag_name); }); Cypress.Commands.add("goToEntityLineageGraph", (entity_type, urn) => { - cy.visit( - `/${entity_type}/${urn}?is_lineage_mode=true` - ); -}) + cy.visit(`/${entity_type}/${urn}?is_lineage_mode=true`); +}); -Cypress.Commands.add("goToEntityLineageGraph", (entity_type, urn, start_time_millis, end_time_millis) => { - cy.visit( - `/${entity_type}/${urn}?is_lineage_mode=true&start_time_millis=${start_time_millis}&end_time_millis=${end_time_millis}` - ); -}) +Cypress.Commands.add( + "goToEntityLineageGraph", + (entity_type, urn, start_time_millis, end_time_millis) => { + cy.visit( + `/${entity_type}/${urn}?is_lineage_mode=true&start_time_millis=${start_time_millis}&end_time_millis=${end_time_millis}`, + ); + }, +); Cypress.Commands.add("lineageTabClickOnUpstream", () => { - cy.get('[data-testid="lineage-tab-direction-select-option-downstream"] > b').click(); - cy.get('[data-testid="lineage-tab-direction-select-option-upstream"] > b').click(); -}) - + cy.get( + '[data-testid="lineage-tab-direction-select-option-downstream"] > b', + ).click(); + cy.get( + '[data-testid="lineage-tab-direction-select-option-upstream"] > b', + ).click(); +}); Cypress.Commands.add("goToChart", (urn) => { - cy.visit( - "/chart/" + urn - ); -}) + cy.visit(`/chart/${urn}`); +}); Cypress.Commands.add("goToContainer", (urn) => { - cy.visit( - "/container/" + urn - ); -}) + cy.visit(`/container/${urn}`); +}); Cypress.Commands.add("goToDomain", (urn) => { - cy.visit( - "/domain/" + urn - ); -}) + cy.visit(`/domain/${urn}`); +}); Cypress.Commands.add("goToAnalytics", () => { cy.visit("/analytics"); - cy.contains("Data Landscape Summary", {timeout: 10000}); + cy.contains("Data Landscape Summary", { timeout: 10000 }); }); Cypress.Commands.add("goToUserList", () => { cy.visit("/settings/identities/users"); cy.waitTextVisible("Manage Users & Groups"); -}) +}); Cypress.Commands.add("goToStarSearchList", () => { - cy.visit("/search?query=%2A") - cy.waitTextVisible("Showing") - cy.waitTextVisible("results") -}) + cy.visit("/search?query=%2A"); + cy.waitTextVisible("Showing"); + cy.waitTextVisible("results"); +}); Cypress.Commands.add("openThreeDotDropdown", () => { - cy.clickOptionWithTestId("entity-header-dropdown") + cy.clickOptionWithTestId("entity-header-dropdown"); }); Cypress.Commands.add("openThreeDotMenu", () => { - cy.clickOptionWithTestId("three-dot-menu") + cy.clickOptionWithTestId("three-dot-menu"); }); Cypress.Commands.add("clickOptionWithText", (text) => { - cy.contains(text).should('be.visible').click(); + cy.contains(text).should("be.visible").click(); }); Cypress.Commands.add("clickFirstOptionWithText", (text) => { @@ -210,97 +206,100 @@ Cypress.Commands.add("deleteFromDropdown", () => { Cypress.Commands.add("addViaFormModal", (text, modelHeader) => { cy.waitTextVisible(modelHeader); - cy.get('.ProseMirror-focused').type(text); + cy.get(".ProseMirror-focused").type(text); cy.get(".ant-modal-footer > button:nth-child(2)").click(); }); Cypress.Commands.add("addViaModal", (text, modelHeader, value, dataTestId) => { cy.waitTextVisible(modelHeader); cy.get(".ant-input-affix-wrapper > input[type='text']").first().type(text); - cy.get('[data-testid="' + dataTestId + '"]').click(); - cy.contains(value).should('be.visible'); + cy.get(`[data-testid="${dataTestId}"]`).click(); + cy.contains(value).should("be.visible"); }); -Cypress.Commands.add("addBusinessAttributeViaModal", (text, modelHeader, value, dataTestId) => { - cy.waitTextVisible(modelHeader); - cy.get(".ant-input-affix-wrapper > input[type='text']").first().type(text); - cy.get('[data-testid="' + dataTestId + '"]').click(); - cy.wait(3000); - cy.contains(value).should('be.visible'); -}); +Cypress.Commands.add( + "addBusinessAttributeViaModal", + (text, modelHeader, value, dataTestId) => { + cy.waitTextVisible(modelHeader); + cy.get(".ant-input-affix-wrapper > input[type='text']").first().type(text); + cy.get(`[data-testid="${dataTestId}"]`).click(); + cy.wait(3000); + cy.contains(value).should("be.visible"); + }, +); Cypress.Commands.add("ensureTextNotPresent", (text) => { cy.contains(text).should("not.exist"); }); Cypress.Commands.add("waitTextPresent", (text) => { - cy.contains(text).should('exist'); - cy.contains(text).should('have.length.above', 0); + cy.contains(text).should("exist"); + cy.contains(text).should("have.length.above", 0); return cy.contains(text); -}) +}); Cypress.Commands.add("waitTextVisible", (text) => { - cy.contains(text).should('exist'); - cy.contains(text).should('be.visible'); - cy.contains(text).should('have.length.above', 0); + cy.contains(text).should("exist"); + cy.contains(text).should("be.visible"); + cy.contains(text).should("have.length.above", 0); return cy.contains(text); -}) +}); Cypress.Commands.add("openMultiSelect", (data_id) => { - let selector = `${selectorWithtestId(data_id)}` - cy.get(`.ant-select${selector} > .ant-select-selector > .ant-select-selection-search`).click(); -}) + const selector = `${selectorWithtestId(data_id)}`; + cy.get( + `.ant-select${selector} > .ant-select-selector > .ant-select-selection-search`, + ).click(); +}); -Cypress.Commands.add( 'multiSelect', (within_data_id , text) => { +Cypress.Commands.add("multiSelect", (within_data_id, text) => { cy.openMultiSelect(within_data_id); cy.waitTextVisible(text); cy.clickOptionWithText(text); }); -Cypress.Commands.add("getWithTestId", (id) => { - return cy.get(selectorWithtestId(id)); -}); +Cypress.Commands.add("getWithTestId", (id) => cy.get(selectorWithtestId(id))); Cypress.Commands.add("clickOptionWithId", (id) => { - cy.get(id).click() -}) + cy.get(id).click(); +}); Cypress.Commands.add("enterTextInSpecificTestId", (id, value, text) => { cy.get(selectorWithtestId(id)).eq(value).type(text); -}) +}); Cypress.Commands.add("enterTextInTestId", (id, text) => { cy.get(selectorWithtestId(id)).type(text); -}) +}); Cypress.Commands.add("clickOptionWithTestId", (id) => { cy.get(selectorWithtestId(id)).first().click({ force: true, }); -}) +}); Cypress.Commands.add("clickFirstOptionWithTestId", (id) => { cy.get(selectorWithtestId(id)).first().click({ force: true, }); -}) +}); -Cypress.Commands.add("clickFirstOptionWithSpecificTestId", (id,value) => { +Cypress.Commands.add("clickFirstOptionWithSpecificTestId", (id, value) => { cy.get(selectorWithtestId(id)).eq(value).click({ force: true, }); -}) +}); Cypress.Commands.add("clickOptionWithSpecificClass", (locator, value) => { - cy.get(locator).should('be.visible') + cy.get(locator).should("be.visible"); cy.get(locator).eq(value).click(); -}) +}); Cypress.Commands.add("clickTextOptionWithClass", (locator, text) => { - cy.get(locator).should('be.visible').contains(text).click({force:true}) -}) + cy.get(locator).should("be.visible").contains(text).click({ force: true }); +}); Cypress.Commands.add("hideOnboardingTour", () => { - cy.get('body').type("{ctrl} {meta} h"); + cy.get("body").type("{ctrl} {meta} h"); }); Cypress.Commands.add("clearView", (viewName) => { @@ -308,75 +307,83 @@ Cypress.Commands.add("clearView", (viewName) => { cy.clickOptionWithTestId("view-select-clear"); cy.get("input[data-testid='search-input']").click(); cy.contains(viewName).should("not.be.visible"); -}) +}); -Cypress.Commands.add('addTermToDataset', (urn, dataset_name, term) => { +Cypress.Commands.add("addTermToDataset", (urn, dataset_name, term) => { cy.goToDataset(urn, dataset_name); cy.clickOptionWithText("Add Term"); cy.selectOptionInTagTermModal(term); cy.contains(term); }); -Cypress.Commands.add('addTermToBusinessAttribute', (urn, attribute_name, term) => { - cy.goToBusinessAttribute(urn, attribute_name); - cy.clickOptionWithText("Add Terms"); - cy.selectOptionInTagTermModal(term); - cy.contains(term); -}); - -Cypress.Commands.add('addAttributeToDataset', (urn, dataset_name, businessAttribute) => { - cy.goToDataset(urn, dataset_name); - cy.clickOptionWithText("event_name"); - cy.contains("Business Attribute"); - cy.get('[data-testid="schema-field-event_name-businessAttribute"]').within(() => - cy.contains("Add Attribute").click() - ); - cy.selectOptionInAttributeModal(businessAttribute); - cy.contains(businessAttribute); -}); - -Cypress.Commands.add('selectOptionInTagTermModal', (text) => { +Cypress.Commands.add( + "addTermToBusinessAttribute", + (urn, attribute_name, term) => { + cy.goToBusinessAttribute(urn, attribute_name); + cy.clickOptionWithText("Add Terms"); + cy.selectOptionInTagTermModal(term); + cy.contains(term); + }, +); + +Cypress.Commands.add( + "addAttributeToDataset", + (urn, dataset_name, businessAttribute) => { + cy.goToDataset(urn, dataset_name); + cy.clickOptionWithText("event_name"); + cy.contains("Business Attribute"); + cy.get('[data-testid="schema-field-event_name-businessAttribute"]').within( + () => cy.contains("Add Attribute").click(), + ); + cy.selectOptionInAttributeModal(businessAttribute); + cy.contains(businessAttribute); + }, +); + +Cypress.Commands.add("selectOptionInTagTermModal", (text) => { cy.enterTextInTestId("tag-term-modal-input", text); cy.clickOptionWithTestId("tag-term-option"); - let btn_id = "add-tag-term-from-modal-btn"; + const btn_id = "add-tag-term-from-modal-btn"; cy.clickOptionWithTestId(btn_id); cy.get(selectorWithtestId(btn_id)).should("not.exist"); }); -Cypress.Commands.add('selectOptionInAttributeModal', (text) => { +Cypress.Commands.add("selectOptionInAttributeModal", (text) => { cy.enterTextInTestId("business-attribute-modal-input", text); cy.clickOptionWithTestId("business-attribute-option"); - let btn_id = "add-attribute-from-modal-btn"; + const btn_id = "add-attribute-from-modal-btn"; cy.clickOptionWithTestId(btn_id); cy.get(selectorWithtestId(btn_id)).should("not.exist"); }); -Cypress.Commands.add("removeDomainFromDataset", (urn, dataset_name, domain_urn) => { - cy.goToDataset(urn, dataset_name); - cy.get('.sidebar-domain-section [href="/domain/' + domain_urn + '"] .anticon-close').click(); - cy.clickOptionWithText("Yes"); -}) +Cypress.Commands.add( + "removeDomainFromDataset", + (urn, dataset_name, domain_urn) => { + cy.goToDataset(urn, dataset_name); + cy.get( + `.sidebar-domain-section [href="/domain/${domain_urn}"] .anticon-close`, + ).click(); + cy.clickOptionWithText("Yes"); + }, +); Cypress.Commands.add("openEntityTab", (tab) => { - const selector = 'div[id$="' + tab + '"]:nth-child(1)' + const selector = `div[id$="${tab}"]:nth-child(1)`; cy.highlighElement(selector); - cy.get(selector).click() + cy.get(selector).click(); }); Cypress.Commands.add("highlighElement", (selector) => { cy.wait(3000); - cy.get(selector).then($button => { - $button.css('border', '1px solid magenta') - }) + cy.get(selector).then(($button) => { + $button.css("border", "1px solid magenta"); + }); cy.wait(3000); -}) +}); -Cypress.Commands.add("mouseover", (selector) => { - return cy.get(selector).trigger( - "mouseover", - { force: true } - ); -}) +Cypress.Commands.add("mouseover", (selector) => + cy.get(selector).trigger("mouseover", { force: true }), +); Cypress.Commands.add("createUser", (name, password, email) => { cy.visit("/settings/identities/users"); @@ -396,13 +403,13 @@ Cypress.Commands.add("createUser", (name, password, email) => { cy.waitTextVisible("Welcome to DataHub"); cy.hideOnboardingTour(); cy.waitTextVisible(name); - cy.logout() + cy.logout(); cy.loginWithCredentials(); - }) -}) + }); +}); Cypress.Commands.add("createGroup", (name, description, group_id) => { - cy.visit("/settings/identities/groups") + cy.visit("/settings/identities/groups"); cy.clickOptionWithText("Create group"); cy.waitTextVisible("Create new group"); cy.get("#name").type(name); @@ -413,10 +420,10 @@ Cypress.Commands.add("createGroup", (name, description, group_id) => { cy.get("#createGroupButton").click(); cy.waitTextVisible("Created group!"); cy.waitTextVisible(name); -}) +}); Cypress.Commands.add("addGroupMember", (group_name, group_urn, member_name) => { - cy.visit(group_urn) + cy.visit(group_urn); cy.clickOptionWithText(group_name); cy.contains(group_name).should("be.visible"); cy.get('[role="tab"]').contains("Members").click(); @@ -428,20 +435,21 @@ Cypress.Commands.add("addGroupMember", (group_name, group_urn, member_name) => { cy.contains(member_name).should("have.length", 1); cy.get('[role="dialog"] button').contains("Add").click({ force: true }); cy.waitTextVisible("Group members added!"); - cy.contains(member_name, {timeout: 10000}).should("be.visible"); -}) + cy.contains(member_name, { timeout: 10000 }).should("be.visible"); +}); Cypress.Commands.add("createGlossaryTermGroup", (term_group_name) => { cy.goToGlossaryList(); - cy.clickOptionWithText('Add Term Group'); + cy.clickOptionWithText("Add Term Group"); cy.waitTextVisible("Create Term Group"); cy.enterTextInTestId("create-glossary-entity-modal-name", term_group_name); cy.clickOptionWithTestId("glossary-entity-modal-create-button"); - cy.get('[data-testid="glossary-browser-sidebar"]').contains(term_group_name).should("be.visible"); + cy.get('[data-testid="glossary-browser-sidebar"]') + .contains(term_group_name) + .should("be.visible"); cy.waitTextVisible(`Created Term Group!`); }); - // // // -- This is a child command -- diff --git a/smoke-test/tests/cypress/cypress/support/e2e.js b/smoke-test/tests/cypress/cypress/support/e2e.js index 751649a8d6cd6..dee6d14ace4dd 100644 --- a/smoke-test/tests/cypress/cypress/support/e2e.js +++ b/smoke-test/tests/cypress/cypress/support/e2e.js @@ -14,14 +14,14 @@ // *********************************************************** // Import commands.js using ES2015 syntax: -import './commands' +import "./commands"; // Alternatively you can use CommonJS syntax: // require('./commands') // https://github.com/bahmutov/cypress-timestamps -require('cypress-timestamps/support')({ - terminal: true, // by default the terminal output is disabled - error: true, - commandLog: true, -}); \ No newline at end of file +require("cypress-timestamps/support")({ + terminal: true, // by default the terminal output is disabled + error: true, + commandLog: true, +}); diff --git a/smoke-test/tests/cypress/cypress_dbt_data.json b/smoke-test/tests/cypress/cypress_dbt_data.json index 087af0f3704c6..3e0c3a9a4d54a 100644 --- a/smoke-test/tests/cypress/cypress_dbt_data.json +++ b/smoke-test/tests/cypress/cypress_dbt_data.json @@ -1,5 +1,5 @@ [ -{ + { "auditHeader": null, "entityType": "container", "entityUrn": "urn:li:container:b5e95fce839e7d78151ed7e0a7420d84", @@ -7,18 +7,18 @@ "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { - "value": "{\"customProperties\": {\"platform\": \"bigquery\", \"instance\": \"PROD\", \"project_id\": \"cypress_project\"}, \"name\": \"cypress_project\"}", - "contentType": "application/json" + "value": "{\"customProperties\": {\"platform\": \"bigquery\", \"instance\": \"PROD\", \"project_id\": \"cypress_project\"}, \"name\": \"cypress_project\"}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162350940, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162350940, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "container", "entityUrn": "urn:li:container:b5e95fce839e7d78151ed7e0a7420d84", @@ -26,18 +26,18 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:bigquery\"}", - "contentType": "application/json" + "value": "{\"platform\": \"urn:li:dataPlatform:bigquery\"}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162350941, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162350941, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "container", "entityUrn": "urn:li:container:b5e95fce839e7d78151ed7e0a7420d84", @@ -45,18 +45,18 @@ "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { - "value": "{\"typeNames\": [\"Project\"]}", - "contentType": "application/json" + "value": "{\"typeNames\": [\"Project\"]}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162350942, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162350942, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "container", "entityUrn": "urn:li:container:348c96555971d3f5c1ffd7dd2e7446cb", @@ -64,18 +64,18 @@ "changeType": "UPSERT", "aspectName": "containerProperties", "aspect": { - "value": "{\"customProperties\": {\"platform\": \"bigquery\", \"instance\": \"PROD\", \"project_id\": \"cypress_project\", \"dataset_id\": \"jaffle_shop\"}, \"name\": \"jaffle_shop\"}", - "contentType": "application/json" + "value": "{\"customProperties\": {\"platform\": \"bigquery\", \"instance\": \"PROD\", \"project_id\": \"cypress_project\", \"dataset_id\": \"jaffle_shop\"}, \"name\": \"jaffle_shop\"}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162353361, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162353361, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "container", "entityUrn": "urn:li:container:348c96555971d3f5c1ffd7dd2e7446cb", @@ -83,18 +83,18 @@ "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { - "value": "{\"platform\": \"urn:li:dataPlatform:bigquery\"}", - "contentType": "application/json" + "value": "{\"platform\": \"urn:li:dataPlatform:bigquery\"}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162353362, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162353362, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "container", "entityUrn": "urn:li:container:348c96555971d3f5c1ffd7dd2e7446cb", @@ -102,18 +102,18 @@ "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { - "value": "{\"typeNames\": [\"Dataset\"]}", - "contentType": "application/json" + "value": "{\"typeNames\": [\"Dataset\"]}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162353363, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162353363, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "container", "entityUrn": "urn:li:container:348c96555971d3f5c1ffd7dd2e7446cb", @@ -121,18 +121,18 @@ "changeType": "UPSERT", "aspectName": "container", "aspect": { - "value": "{\"container\": \"urn:li:container:b5e95fce839e7d78151ed7e0a7420d84\"}", - "contentType": "application/json" + "value": "{\"container\": \"urn:li:container:b5e95fce839e7d78151ed7e0a7420d84\"}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162353364, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162353364, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.customers,PROD)", @@ -140,223 +140,223 @@ "changeType": "UPSERT", "aspectName": "container", "aspect": { - "value": "{\"container\": \"urn:li:container:348c96555971d3f5c1ffd7dd2e7446cb\"}", - "contentType": "application/json" + "value": "{\"container\": \"urn:li:container:348c96555971d3f5c1ffd7dd2e7446cb\"}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162353970, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162353970, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.customers,PROD)", - "aspects": [ + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.customers,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "externalUrl": null, + "name": "customers", + "qualifiedName": null, + "description": null, + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "cypress_project.jaffle_shop.customers", + "platform": "urn:li:dataPlatform:bigquery", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "deleted": null, + "dataset": null, + "cluster": null, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "customer_id", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "Integer()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "first_name", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "String()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "last_name", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "String()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "first_order", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.DateType": {} + } + }, + "nativeDataType": "DATE()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false + "fieldPath": "most_recent_order", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.DateType": {} } + }, + "nativeDataType": "DATE()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null }, { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": {}, - "externalUrl": null, - "name": "customers", - "qualifiedName": null, - "description": null, - "uri": null, - "tags": [] + "fieldPath": "number_of_orders", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} } + }, + "nativeDataType": "Integer()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null }, { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "cypress_project.jaffle_shop.customers", - "platform": "urn:li:dataPlatform:bigquery", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "deleted": null, - "dataset": null, - "cluster": null, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [ - { - "fieldPath": "customer_id", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "Integer()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "first_name", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "String()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "last_name", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "String()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "first_order", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} - } - }, - "nativeDataType": "DATE()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "most_recent_order", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} - } - }, - "nativeDataType": "DATE()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "number_of_orders", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "Integer()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "customer_lifetime_value", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "Float()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null + "fieldPath": "customer_lifetime_value", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} } + }, + "nativeDataType": "Float()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null } - ] - } + ], + "primaryKeys": null, + "foreignKeysSpecs": null, + "foreignKeys": null + } + } + ] + } }, "proposedDelta": null, "systemMetadata": { - "lastObserved": 1655162353971, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162353971, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.customers,PROD)", @@ -364,18 +364,18 @@ "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { - "value": "{\"typeNames\": [\"table\"]}", - "contentType": "application/json" + "value": "{\"typeNames\": [\"table\"]}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162353980, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162353980, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.customers_source,PROD)", @@ -383,123 +383,123 @@ "changeType": "UPSERT", "aspectName": "container", "aspect": { - "value": "{\"container\": \"urn:li:container:348c96555971d3f5c1ffd7dd2e7446cb\"}", - "contentType": "application/json" + "value": "{\"container\": \"urn:li:container:348c96555971d3f5c1ffd7dd2e7446cb\"}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162354204, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162354204, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.customers_source,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.customers_source,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "externalUrl": null, + "name": "customers_source", + "qualifiedName": null, + "description": null, + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "cypress_project.jaffle_shop.customers_source", + "platform": "urn:li:dataPlatform:bigquery", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "deleted": null, + "dataset": null, + "cluster": null, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": {}, - "externalUrl": null, - "name": "customers_source", - "qualifiedName": null, - "description": null, - "uri": null, - "tags": [] + "fieldPath": "source_name", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} } + }, + "nativeDataType": "String()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null }, { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "cypress_project.jaffle_shop.customers_source", - "platform": "urn:li:dataPlatform:bigquery", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "deleted": null, - "dataset": null, - "cluster": null, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [ - { - "fieldPath": "source_name", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "String()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "siour", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "String()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null + "fieldPath": "siour", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} } + }, + "nativeDataType": "String()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null } - ] - } + ], + "primaryKeys": null, + "foreignKeysSpecs": null, + "foreignKeys": null + } + } + ] + } }, "proposedDelta": null, "systemMetadata": { - "lastObserved": 1655162354206, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162354206, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.customers_source,PROD)", @@ -507,18 +507,18 @@ "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { - "value": "{\"typeNames\": [\"table\"]}", - "contentType": "application/json" + "value": "{\"typeNames\": [\"table\"]}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162354211, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162354211, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.orders,PROD)", @@ -526,263 +526,263 @@ "changeType": "UPSERT", "aspectName": "container", "aspect": { - "value": "{\"container\": \"urn:li:container:348c96555971d3f5c1ffd7dd2e7446cb\"}", - "contentType": "application/json" + "value": "{\"container\": \"urn:li:container:348c96555971d3f5c1ffd7dd2e7446cb\"}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162354420, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162354420, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.orders,PROD)", - "aspects": [ + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.orders,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "externalUrl": null, + "name": "orders", + "qualifiedName": null, + "description": null, + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "cypress_project.jaffle_shop.orders", + "platform": "urn:li:dataPlatform:bigquery", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "deleted": null, + "dataset": null, + "cluster": null, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "order_id", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "Integer()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false + "fieldPath": "customer_id", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} } + }, + "nativeDataType": "Integer()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null }, { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": {}, - "externalUrl": null, - "name": "orders", - "qualifiedName": null, - "description": null, - "uri": null, - "tags": [] + "fieldPath": "order_date", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.DateType": {} } + }, + "nativeDataType": "DATE()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null }, { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "cypress_project.jaffle_shop.orders", - "platform": "urn:li:dataPlatform:bigquery", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "deleted": null, - "dataset": null, - "cluster": null, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [ - { - "fieldPath": "order_id", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "Integer()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "customer_id", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "Integer()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "order_date", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} - } - }, - "nativeDataType": "DATE()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "status", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "String()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "credit_card_amount", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "Float()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "coupon_amount", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "Float()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "bank_transfer_amount", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "Float()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "gift_card_amount", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "Float()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "amount", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "Float()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null + "fieldPath": "status", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} } + }, + "nativeDataType": "String()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "credit_card_amount", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "Float()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "coupon_amount", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "Float()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "bank_transfer_amount", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "Float()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "gift_card_amount", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "Float()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "amount", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "Float()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null } - ] - } + ], + "primaryKeys": null, + "foreignKeysSpecs": null, + "foreignKeys": null + } + } + ] + } }, "proposedDelta": null, "systemMetadata": { - "lastObserved": 1655162354421, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162354421, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.orders,PROD)", @@ -790,18 +790,18 @@ "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { - "value": "{\"typeNames\": [\"table\"]}", - "contentType": "application/json" + "value": "{\"typeNames\": [\"table\"]}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162354427, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162354427, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.raw_customers,PROD)", @@ -809,143 +809,143 @@ "changeType": "UPSERT", "aspectName": "container", "aspect": { - "value": "{\"container\": \"urn:li:container:348c96555971d3f5c1ffd7dd2e7446cb\"}", - "contentType": "application/json" + "value": "{\"container\": \"urn:li:container:348c96555971d3f5c1ffd7dd2e7446cb\"}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162354667, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162354667, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.raw_customers,PROD)", - "aspects": [ + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.raw_customers,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "externalUrl": null, + "name": "raw_customers", + "qualifiedName": null, + "description": null, + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "cypress_project.jaffle_shop.raw_customers", + "platform": "urn:li:dataPlatform:bigquery", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "deleted": null, + "dataset": null, + "cluster": null, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false + "fieldPath": "id", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} } + }, + "nativeDataType": "Integer()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null }, { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": {}, - "externalUrl": null, - "name": "raw_customers", - "qualifiedName": null, - "description": null, - "uri": null, - "tags": [] + "fieldPath": "first_name", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} } + }, + "nativeDataType": "String()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null }, { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "cypress_project.jaffle_shop.raw_customers", - "platform": "urn:li:dataPlatform:bigquery", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "deleted": null, - "dataset": null, - "cluster": null, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [ - { - "fieldPath": "id", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "Integer()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "first_name", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "String()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "last_name", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "String()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null + "fieldPath": "last_name", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} } + }, + "nativeDataType": "String()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null } - ] - } + ], + "primaryKeys": null, + "foreignKeysSpecs": null, + "foreignKeys": null + } + } + ] + } }, "proposedDelta": null, "systemMetadata": { - "lastObserved": 1655162354668, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162354668, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.raw_customers,PROD)", @@ -953,18 +953,18 @@ "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { - "value": "{\"typeNames\": [\"table\"]}", - "contentType": "application/json" + "value": "{\"typeNames\": [\"table\"]}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162354671, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162354671, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.raw_orders,PROD)", @@ -972,163 +972,163 @@ "changeType": "UPSERT", "aspectName": "container", "aspect": { - "value": "{\"container\": \"urn:li:container:348c96555971d3f5c1ffd7dd2e7446cb\"}", - "contentType": "application/json" + "value": "{\"container\": \"urn:li:container:348c96555971d3f5c1ffd7dd2e7446cb\"}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162354871, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162354871, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.raw_orders,PROD)", - "aspects": [ + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.raw_orders,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "externalUrl": null, + "name": "raw_orders", + "qualifiedName": null, + "description": null, + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "cypress_project.jaffle_shop.raw_orders", + "platform": "urn:li:dataPlatform:bigquery", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "deleted": null, + "dataset": null, + "cluster": null, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false + "fieldPath": "id", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} } + }, + "nativeDataType": "Integer()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null }, { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": {}, - "externalUrl": null, - "name": "raw_orders", - "qualifiedName": null, - "description": null, - "uri": null, - "tags": [] + "fieldPath": "user_id", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} } + }, + "nativeDataType": "Integer()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null }, { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "cypress_project.jaffle_shop.raw_orders", - "platform": "urn:li:dataPlatform:bigquery", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "deleted": null, - "dataset": null, - "cluster": null, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [ - { - "fieldPath": "id", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "Integer()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "user_id", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "Integer()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "order_date", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} - } - }, - "nativeDataType": "DATE()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "status", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "String()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null + "fieldPath": "order_date", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.DateType": {} } + }, + "nativeDataType": "DATE()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "status", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "String()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null } - ] - } + ], + "primaryKeys": null, + "foreignKeysSpecs": null, + "foreignKeys": null + } + } + ] + } }, "proposedDelta": null, "systemMetadata": { - "lastObserved": 1655162354873, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162354873, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.raw_orders,PROD)", @@ -1136,18 +1136,18 @@ "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { - "value": "{\"typeNames\": [\"table\"]}", - "contentType": "application/json" + "value": "{\"typeNames\": [\"table\"]}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162354879, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162354879, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.raw_payments,PROD)", @@ -1155,163 +1155,163 @@ "changeType": "UPSERT", "aspectName": "container", "aspect": { - "value": "{\"container\": \"urn:li:container:348c96555971d3f5c1ffd7dd2e7446cb\"}", - "contentType": "application/json" + "value": "{\"container\": \"urn:li:container:348c96555971d3f5c1ffd7dd2e7446cb\"}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162355105, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162355105, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.raw_payments,PROD)", - "aspects": [ + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.raw_payments,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "externalUrl": null, + "name": "raw_payments", + "qualifiedName": null, + "description": null, + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "cypress_project.jaffle_shop.raw_payments", + "platform": "urn:li:dataPlatform:bigquery", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "deleted": null, + "dataset": null, + "cluster": null, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false + "fieldPath": "id", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} } + }, + "nativeDataType": "Integer()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null }, { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": {}, - "externalUrl": null, - "name": "raw_payments", - "qualifiedName": null, - "description": null, - "uri": null, - "tags": [] + "fieldPath": "order_id", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} } + }, + "nativeDataType": "Integer()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null }, { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "cypress_project.jaffle_shop.raw_payments", - "platform": "urn:li:dataPlatform:bigquery", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "deleted": null, - "dataset": null, - "cluster": null, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [ - { - "fieldPath": "id", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "Integer()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "order_id", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "Integer()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "payment_method", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "String()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "amount", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "Integer()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null + "fieldPath": "payment_method", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} } + }, + "nativeDataType": "String()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "amount", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "Integer()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null } - ] - } + ], + "primaryKeys": null, + "foreignKeysSpecs": null, + "foreignKeys": null + } + } + ] + } }, "proposedDelta": null, "systemMetadata": { - "lastObserved": 1655162355107, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162355107, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.raw_payments,PROD)", @@ -1319,18 +1319,18 @@ "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { - "value": "{\"typeNames\": [\"table\"]}", - "contentType": "application/json" + "value": "{\"typeNames\": [\"table\"]}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162355113, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162355113, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.stg_customers,PROD)", @@ -1338,146 +1338,146 @@ "changeType": "UPSERT", "aspectName": "container", "aspect": { - "value": "{\"container\": \"urn:li:container:348c96555971d3f5c1ffd7dd2e7446cb\"}", - "contentType": "application/json" + "value": "{\"container\": \"urn:li:container:348c96555971d3f5c1ffd7dd2e7446cb\"}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162355777, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162355777, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.stg_customers,PROD)", - "aspects": [ + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.stg_customers,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "view_definition": "with source as (\n select * from `cypress_project`.`jaffle_shop`.`raw_customers`\n\n),\n\nrenamed as (\n\n select\n id as customer_id,\n first_name,\n last_name\n\n from source\n\n)\n\nselect * from renamed", + "is_view": "True" + }, + "externalUrl": null, + "name": "stg_customers", + "qualifiedName": null, + "description": null, + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "cypress_project.jaffle_shop.stg_customers", + "platform": "urn:li:dataPlatform:bigquery", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "deleted": null, + "dataset": null, + "cluster": null, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false + "fieldPath": "customer_id", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} } + }, + "nativeDataType": "Integer()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null }, { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "view_definition": "with source as (\n select * from `cypress_project`.`jaffle_shop`.`raw_customers`\n\n),\n\nrenamed as (\n\n select\n id as customer_id,\n first_name,\n last_name\n\n from source\n\n)\n\nselect * from renamed", - "is_view": "True" - }, - "externalUrl": null, - "name": "stg_customers", - "qualifiedName": null, - "description": null, - "uri": null, - "tags": [] + "fieldPath": "first_name", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} } + }, + "nativeDataType": "String()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null }, { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "cypress_project.jaffle_shop.stg_customers", - "platform": "urn:li:dataPlatform:bigquery", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "deleted": null, - "dataset": null, - "cluster": null, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [ - { - "fieldPath": "customer_id", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "Integer()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "first_name", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "String()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "last_name", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "String()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null + "fieldPath": "last_name", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} } + }, + "nativeDataType": "String()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null } - ] - } + ], + "primaryKeys": null, + "foreignKeysSpecs": null, + "foreignKeys": null + } + } + ] + } }, "proposedDelta": null, "systemMetadata": { - "lastObserved": 1655162355778, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162355778, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.stg_customers,PROD)", @@ -1485,18 +1485,18 @@ "changeType": "UPSERT", "aspectName": "upstreamLineage", "aspect": { - "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.raw_customers,PROD)\", \"type\": \"TRANSFORMED\"}]}", - "contentType": "application/json" + "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.raw_customers,PROD)\", \"type\": \"TRANSFORMED\"}]}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162355783, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162355783, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.stg_customers,PROD)", @@ -1504,18 +1504,18 @@ "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { - "value": "{\"typeNames\": [\"view\"]}", - "contentType": "application/json" + "value": "{\"typeNames\": [\"view\"]}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162355784, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162355784, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.stg_customers,PROD)", @@ -1523,18 +1523,18 @@ "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { - "value": "{\"materialized\": false, \"viewLogic\": \"with source as (\\n select * from `cypress_project`.`jaffle_shop`.`raw_customers`\\n\\n),\\n\\nrenamed as (\\n\\n select\\n id as customer_id,\\n first_name,\\n last_name\\n\\n from source\\n\\n)\\n\\nselect * from renamed\", \"viewLanguage\": \"SQL\"}", - "contentType": "application/json" + "value": "{\"materialized\": false, \"viewLogic\": \"with source as (\\n select * from `cypress_project`.`jaffle_shop`.`raw_customers`\\n\\n),\\n\\nrenamed as (\\n\\n select\\n id as customer_id,\\n first_name,\\n last_name\\n\\n from source\\n\\n)\\n\\nselect * from renamed\", \"viewLanguage\": \"SQL\"}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162355785, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162355785, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.stg_orders,PROD)", @@ -1542,166 +1542,166 @@ "changeType": "UPSERT", "aspectName": "container", "aspect": { - "value": "{\"container\": \"urn:li:container:348c96555971d3f5c1ffd7dd2e7446cb\"}", - "contentType": "application/json" + "value": "{\"container\": \"urn:li:container:348c96555971d3f5c1ffd7dd2e7446cb\"}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162356113, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162356113, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.stg_orders,PROD)", - "aspects": [ + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.stg_orders,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "view_definition": "with source as (\n select * from `cypress_project`.`jaffle_shop`.`raw_orders`\n\n),\n\nrenamed as (\n\n select\n id as order_id,\n user_id as customer_id,\n order_date,\n status\n\n from source\n\n)\n\nselect * from renamed", + "is_view": "True" + }, + "externalUrl": null, + "name": "stg_orders", + "qualifiedName": null, + "description": null, + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "cypress_project.jaffle_shop.stg_orders", + "platform": "urn:li:dataPlatform:bigquery", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "deleted": null, + "dataset": null, + "cluster": null, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "order_id", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "Integer()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false + "fieldPath": "customer_id", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} } + }, + "nativeDataType": "Integer()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null }, { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "view_definition": "with source as (\n select * from `cypress_project`.`jaffle_shop`.`raw_orders`\n\n),\n\nrenamed as (\n\n select\n id as order_id,\n user_id as customer_id,\n order_date,\n status\n\n from source\n\n)\n\nselect * from renamed", - "is_view": "True" - }, - "externalUrl": null, - "name": "stg_orders", - "qualifiedName": null, - "description": null, - "uri": null, - "tags": [] + "fieldPath": "order_date", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.DateType": {} } + }, + "nativeDataType": "DATE()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null }, { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "cypress_project.jaffle_shop.stg_orders", - "platform": "urn:li:dataPlatform:bigquery", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "deleted": null, - "dataset": null, - "cluster": null, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [ - { - "fieldPath": "order_id", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "Integer()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "customer_id", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "Integer()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "order_date", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} - } - }, - "nativeDataType": "DATE()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "status", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "String()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null + "fieldPath": "status", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} } + }, + "nativeDataType": "String()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null } - ] - } + ], + "primaryKeys": null, + "foreignKeysSpecs": null, + "foreignKeys": null + } + } + ] + } }, "proposedDelta": null, "systemMetadata": { - "lastObserved": 1655162356115, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162356115, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.stg_orders,PROD)", @@ -1709,18 +1709,18 @@ "changeType": "UPSERT", "aspectName": "upstreamLineage", "aspect": { - "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.raw_orders,PROD)\", \"type\": \"TRANSFORMED\"}]}", - "contentType": "application/json" + "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.raw_orders,PROD)\", \"type\": \"TRANSFORMED\"}]}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162356123, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162356123, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.stg_orders,PROD)", @@ -1728,18 +1728,18 @@ "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { - "value": "{\"typeNames\": [\"view\"]}", - "contentType": "application/json" + "value": "{\"typeNames\": [\"view\"]}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162356124, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162356124, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.stg_orders,PROD)", @@ -1747,18 +1747,18 @@ "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { - "value": "{\"materialized\": false, \"viewLogic\": \"with source as (\\n select * from `cypress_project`.`jaffle_shop`.`raw_orders`\\n\\n),\\n\\nrenamed as (\\n\\n select\\n id as order_id,\\n user_id as customer_id,\\n order_date,\\n status\\n\\n from source\\n\\n)\\n\\nselect * from renamed\", \"viewLanguage\": \"SQL\"}", - "contentType": "application/json" + "value": "{\"materialized\": false, \"viewLogic\": \"with source as (\\n select * from `cypress_project`.`jaffle_shop`.`raw_orders`\\n\\n),\\n\\nrenamed as (\\n\\n select\\n id as order_id,\\n user_id as customer_id,\\n order_date,\\n status\\n\\n from source\\n\\n)\\n\\nselect * from renamed\", \"viewLanguage\": \"SQL\"}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162356125, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162356125, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.stg_payments,PROD)", @@ -1766,166 +1766,166 @@ "changeType": "UPSERT", "aspectName": "container", "aspect": { - "value": "{\"container\": \"urn:li:container:348c96555971d3f5c1ffd7dd2e7446cb\"}", - "contentType": "application/json" + "value": "{\"container\": \"urn:li:container:348c96555971d3f5c1ffd7dd2e7446cb\"}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162356440, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162356440, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.stg_payments,PROD)", - "aspects": [ + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.stg_payments,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "view_definition": "with source as (\n select * from `cypress_project`.`jaffle_shop`.`raw_payments`\n\n),\n\nrenamed as (\n\n select\n id as payment_id,\n order_id,\n payment_method,\n\n --`amount` is currently stored in cents, so we convert it to dollars\n amount / 100 as amount\n\n from source\n\n)\n\nselect * from renamed", + "is_view": "True" + }, + "externalUrl": null, + "name": "stg_payments", + "qualifiedName": null, + "description": null, + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "cypress_project.jaffle_shop.stg_payments", + "platform": "urn:li:dataPlatform:bigquery", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "deleted": null, + "dataset": null, + "cluster": null, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "payment_id", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "Integer()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false + "fieldPath": "order_id", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} } + }, + "nativeDataType": "Integer()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null }, { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "view_definition": "with source as (\n select * from `cypress_project`.`jaffle_shop`.`raw_payments`\n\n),\n\nrenamed as (\n\n select\n id as payment_id,\n order_id,\n payment_method,\n\n --`amount` is currently stored in cents, so we convert it to dollars\n amount / 100 as amount\n\n from source\n\n)\n\nselect * from renamed", - "is_view": "True" - }, - "externalUrl": null, - "name": "stg_payments", - "qualifiedName": null, - "description": null, - "uri": null, - "tags": [] + "fieldPath": "payment_method", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} } + }, + "nativeDataType": "String()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null }, { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "cypress_project.jaffle_shop.stg_payments", - "platform": "urn:li:dataPlatform:bigquery", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "deleted": null, - "dataset": null, - "cluster": null, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [ - { - "fieldPath": "payment_id", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "Integer()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "order_id", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "Integer()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "payment_method", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "String()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "amount", - "jsonPath": null, - "nullable": true, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "Float()", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null + "fieldPath": "amount", + "jsonPath": null, + "nullable": true, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} } + }, + "nativeDataType": "Float()", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null } - ] - } + ], + "primaryKeys": null, + "foreignKeysSpecs": null, + "foreignKeys": null + } + } + ] + } }, "proposedDelta": null, "systemMetadata": { - "lastObserved": 1655162356441, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162356441, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.stg_payments,PROD)", @@ -1933,18 +1933,18 @@ "changeType": "UPSERT", "aspectName": "upstreamLineage", "aspect": { - "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.raw_payments,PROD)\", \"type\": \"TRANSFORMED\"}]}", - "contentType": "application/json" + "value": "{\"upstreams\": [{\"auditStamp\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"dataset\": \"urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.raw_payments,PROD)\", \"type\": \"TRANSFORMED\"}]}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162356445, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162356445, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.stg_payments,PROD)", @@ -1952,18 +1952,18 @@ "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { - "value": "{\"typeNames\": [\"view\"]}", - "contentType": "application/json" + "value": "{\"typeNames\": [\"view\"]}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162356446, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162356446, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.stg_payments,PROD)", @@ -1971,18 +1971,18 @@ "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { - "value": "{\"materialized\": false, \"viewLogic\": \"with source as (\\n select * from `cypress_project`.`jaffle_shop`.`raw_payments`\\n\\n),\\n\\nrenamed as (\\n\\n select\\n id as payment_id,\\n order_id,\\n payment_method,\\n\\n --`amount` is currently stored in cents, so we convert it to dollars\\n amount / 100 as amount\\n\\n from source\\n\\n)\\n\\nselect * from renamed\", \"viewLanguage\": \"SQL\"}", - "contentType": "application/json" + "value": "{\"materialized\": false, \"viewLogic\": \"with source as (\\n select * from `cypress_project`.`jaffle_shop`.`raw_payments`\\n\\n),\\n\\nrenamed as (\\n\\n select\\n id as payment_id,\\n order_id,\\n payment_method,\\n\\n --`amount` is currently stored in cents, so we convert it to dollars\\n amount / 100 as amount\\n\\n from source\\n\\n)\\n\\nselect * from renamed\", \"viewLanguage\": \"SQL\"}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162356446, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162356446, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.customers,PROD)", @@ -1990,18 +1990,18 @@ "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { - "value": "{\"timestampMillis\": 1655162357476, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 100, \"columnCount\": 7, \"fieldProfiles\": [{\"fieldPath\": \"customer_id\", \"uniqueCount\": 100, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"61\", \"74\", \"48\", \"75\", \"87\", \"14\", \"37\", \"55\", \"49\", \"78\", \"77\", \"10\", \"15\", \"60\", \"24\", \"45\", \"62\", \"98\", \"5\", \"97\"]}, {\"fieldPath\": \"first_name\", \"uniqueCount\": 79, \"uniqueProportion\": 0.79, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Timothy\", \"Harry\", \"Lillian\", \"Andrea\", \"Phillip\", \"Steve\", \"Shirley\", \"Nicholas\", \"Judy\", \"Harry\", \"Anne\", \"Henry\", \"Teresa\", \"Norma\", \"David\", \"Scott\", \"Elizabeth\", \"Nicole\", \"Katherine\", \"Shirley\"]}, {\"fieldPath\": \"last_name\", \"uniqueCount\": 19, \"uniqueProportion\": 0.19, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"R.\", \"A.\", \"C.\", \"H.\", \"B.\", \"F.\", \"J.\", \"R.\", \"N.\", \"H.\", \"W.\", \"W.\", \"H.\", \"W.\", \"G.\", \"B.\", \"P.\", \"M.\", \"R.\", \"D.\"]}, {\"fieldPath\": \"first_order\", \"uniqueCount\": 46, \"uniqueProportion\": 0.7419354838709677, \"nullCount\": 38, \"nullProportion\": 0.38, \"min\": \"2018-01-01\", \"max\": \"2018-04-07\", \"sampleValues\": [\"2018-03-01\", \"2018-03-23\", \"2018-02-26\", \"2018-01-17\", \"2018-02-04\", \"2018-03-23\", \"2018-03-16\", \"2018-03-03\", \"2018-01-24\", \"2018-02-19\", \"2018-01-18\", \"2018-04-07\", \"2018-02-02\", \"2018-04-07\", \"2018-02-13\", \"2018-01-23\", \"2018-02-06\", \"2018-01-09\", \"2018-02-16\", \"2018-02-17\"]}, {\"fieldPath\": \"most_recent_order\", \"uniqueCount\": 52, \"uniqueProportion\": 0.8387096774193549, \"nullCount\": 38, \"nullProportion\": 0.38, \"min\": \"2018-01-09\", \"max\": \"2018-04-09\", \"sampleValues\": [\"2018-03-01\", \"2018-03-23\", \"2018-02-26\", \"2018-01-17\", \"2018-02-04\", \"2018-03-23\", \"2018-03-16\", \"2018-03-03\", \"2018-01-24\", \"2018-02-19\", \"2018-01-18\", \"2018-04-07\", \"2018-02-02\", \"2018-04-07\", \"2018-02-13\", \"2018-01-23\", \"2018-02-06\", \"2018-01-09\", \"2018-02-16\", \"2018-02-17\"]}, {\"fieldPath\": \"number_of_orders\", \"uniqueCount\": 4, \"uniqueProportion\": 0.06451612903225806, \"nullCount\": 38, \"nullProportion\": 0.38, \"min\": \"1\", \"max\": \"5\", \"mean\": \"1.5967741935483863\", \"median\": \"1.0\", \"stdev\": \"0.7779687173818426\", \"sampleValues\": [\"1\", \"1\", \"1\", \"1\", \"1\", \"1\", \"1\", \"1\", \"1\", \"1\", \"1\", \"1\", \"1\", \"1\", \"1\", \"1\", \"1\", \"1\", \"1\", \"1\"]}, {\"fieldPath\": \"customer_lifetime_value\", \"uniqueCount\": 35, \"uniqueProportion\": 0.5645161290322581, \"nullCount\": 38, \"nullProportion\": 0.38, \"min\": \"1.0\", \"max\": \"99.0\", \"mean\": \"26.967741935483883\", \"median\": \"26.5\", \"sampleValues\": [\"2.0\", \"2.0\", \"3.0\", \"3.0\", \"3.0\", \"3.0\", \"3.0\", \"4.0\", \"8.0\", \"8.0\", \"10.0\", \"10.0\", \"12.0\", \"14.0\", \"14.0\", \"15.0\", \"15.0\", \"16.0\", \"17.0\", \"18.0\"]}]}", - "contentType": "application/json" + "value": "{\"timestampMillis\": 1655162357476, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 100, \"columnCount\": 7, \"fieldProfiles\": [{\"fieldPath\": \"customer_id\", \"uniqueCount\": 100, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"61\", \"74\", \"48\", \"75\", \"87\", \"14\", \"37\", \"55\", \"49\", \"78\", \"77\", \"10\", \"15\", \"60\", \"24\", \"45\", \"62\", \"98\", \"5\", \"97\"]}, {\"fieldPath\": \"first_name\", \"uniqueCount\": 79, \"uniqueProportion\": 0.79, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Timothy\", \"Harry\", \"Lillian\", \"Andrea\", \"Phillip\", \"Steve\", \"Shirley\", \"Nicholas\", \"Judy\", \"Harry\", \"Anne\", \"Henry\", \"Teresa\", \"Norma\", \"David\", \"Scott\", \"Elizabeth\", \"Nicole\", \"Katherine\", \"Shirley\"]}, {\"fieldPath\": \"last_name\", \"uniqueCount\": 19, \"uniqueProportion\": 0.19, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"R.\", \"A.\", \"C.\", \"H.\", \"B.\", \"F.\", \"J.\", \"R.\", \"N.\", \"H.\", \"W.\", \"W.\", \"H.\", \"W.\", \"G.\", \"B.\", \"P.\", \"M.\", \"R.\", \"D.\"]}, {\"fieldPath\": \"first_order\", \"uniqueCount\": 46, \"uniqueProportion\": 0.7419354838709677, \"nullCount\": 38, \"nullProportion\": 0.38, \"min\": \"2018-01-01\", \"max\": \"2018-04-07\", \"sampleValues\": [\"2018-03-01\", \"2018-03-23\", \"2018-02-26\", \"2018-01-17\", \"2018-02-04\", \"2018-03-23\", \"2018-03-16\", \"2018-03-03\", \"2018-01-24\", \"2018-02-19\", \"2018-01-18\", \"2018-04-07\", \"2018-02-02\", \"2018-04-07\", \"2018-02-13\", \"2018-01-23\", \"2018-02-06\", \"2018-01-09\", \"2018-02-16\", \"2018-02-17\"]}, {\"fieldPath\": \"most_recent_order\", \"uniqueCount\": 52, \"uniqueProportion\": 0.8387096774193549, \"nullCount\": 38, \"nullProportion\": 0.38, \"min\": \"2018-01-09\", \"max\": \"2018-04-09\", \"sampleValues\": [\"2018-03-01\", \"2018-03-23\", \"2018-02-26\", \"2018-01-17\", \"2018-02-04\", \"2018-03-23\", \"2018-03-16\", \"2018-03-03\", \"2018-01-24\", \"2018-02-19\", \"2018-01-18\", \"2018-04-07\", \"2018-02-02\", \"2018-04-07\", \"2018-02-13\", \"2018-01-23\", \"2018-02-06\", \"2018-01-09\", \"2018-02-16\", \"2018-02-17\"]}, {\"fieldPath\": \"number_of_orders\", \"uniqueCount\": 4, \"uniqueProportion\": 0.06451612903225806, \"nullCount\": 38, \"nullProportion\": 0.38, \"min\": \"1\", \"max\": \"5\", \"mean\": \"1.5967741935483863\", \"median\": \"1.0\", \"stdev\": \"0.7779687173818426\", \"sampleValues\": [\"1\", \"1\", \"1\", \"1\", \"1\", \"1\", \"1\", \"1\", \"1\", \"1\", \"1\", \"1\", \"1\", \"1\", \"1\", \"1\", \"1\", \"1\", \"1\", \"1\"]}, {\"fieldPath\": \"customer_lifetime_value\", \"uniqueCount\": 35, \"uniqueProportion\": 0.5645161290322581, \"nullCount\": 38, \"nullProportion\": 0.38, \"min\": \"1.0\", \"max\": \"99.0\", \"mean\": \"26.967741935483883\", \"median\": \"26.5\", \"sampleValues\": [\"2.0\", \"2.0\", \"3.0\", \"3.0\", \"3.0\", \"3.0\", \"3.0\", \"4.0\", \"8.0\", \"8.0\", \"10.0\", \"10.0\", \"12.0\", \"14.0\", \"14.0\", \"15.0\", \"15.0\", \"16.0\", \"17.0\", \"18.0\"]}]}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162378272, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162378272, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.customers_source,PROD)", @@ -2009,18 +2009,18 @@ "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { - "value": "{\"timestampMillis\": 1655162357619, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 0, \"columnCount\": 2, \"fieldProfiles\": [{\"fieldPath\": \"source_name\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}, {\"fieldPath\": \"siour\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}]}", - "contentType": "application/json" + "value": "{\"timestampMillis\": 1655162357619, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 0, \"columnCount\": 2, \"fieldProfiles\": [{\"fieldPath\": \"source_name\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}, {\"fieldPath\": \"siour\", \"uniqueCount\": 0, \"nullCount\": 0, \"sampleValues\": []}]}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162378286, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162378286, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.orders,PROD)", @@ -2028,18 +2028,18 @@ "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { - "value": "{\"timestampMillis\": 1655162357642, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 99, \"columnCount\": 9, \"fieldProfiles\": [{\"fieldPath\": \"order_id\", \"uniqueCount\": 99, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"86\", \"4\", \"9\", \"44\", \"24\", \"3\", \"62\", \"95\", \"81\", \"65\", \"94\", \"42\", \"19\", \"23\", \"58\", \"59\", \"76\", \"43\", \"93\", \"15\"]}, {\"fieldPath\": \"customer_id\", \"uniqueCount\": 62, \"uniqueProportion\": 0.6262626262626263, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1\", \"max\": \"99\", \"mean\": \"48.25252525252523\", \"median\": \"50\", \"stdev\": \"27.781341350472964\", \"sampleValues\": [\"68\", \"50\", \"53\", \"66\", \"3\", \"94\", \"57\", \"27\", \"76\", \"26\", \"63\", \"92\", \"54\", \"22\", \"22\", \"30\", \"25\", \"31\", \"66\", \"25\"]}, {\"fieldPath\": \"order_date\", \"uniqueCount\": 69, \"uniqueProportion\": 0.696969696969697, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"2018-01-01\", \"max\": \"2018-04-09\", \"sampleValues\": [\"2018-03-26\", \"2018-01-05\", \"2018-01-12\", \"2018-02-17\", \"2018-01-27\", \"2018-01-04\", \"2018-03-05\", \"2018-04-04\", \"2018-03-23\", \"2018-03-08\", \"2018-04-03\", \"2018-02-16\", \"2018-01-22\", \"2018-01-26\", \"2018-03-01\", \"2018-03-02\", \"2018-03-20\", \"2018-02-17\", \"2018-04-03\", \"2018-01-17\"]}, {\"fieldPath\": \"status\", \"uniqueCount\": 5, \"uniqueProportion\": 0.050505050505050504, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"placed\", \"completed\", \"completed\", \"completed\", \"completed\", \"completed\", \"completed\", \"placed\", \"shipped\", \"completed\", \"placed\", \"completed\", \"completed\", \"return_pending\", \"completed\", \"completed\", \"completed\", \"completed\", \"placed\", \"completed\"]}, {\"fieldPath\": \"credit_card_amount\", \"uniqueCount\": 25, \"uniqueProportion\": 0.25252525252525254, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"0.0\", \"max\": \"30.0\", \"mean\": \"8.797979797979806\", \"median\": \"0.0\", \"sampleValues\": [\"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\"]}, {\"fieldPath\": \"coupon_amount\", \"uniqueCount\": 12, \"uniqueProportion\": 0.12121212121212122, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"0.0\", \"max\": \"26.0\", \"mean\": \"1.8686868686868698\", \"median\": \"0.0\", \"sampleValues\": [\"23.0\", \"25.0\", \"0.0\", \"0.0\", \"26.0\", \"1.0\", \"0.0\", \"24.0\", \"2.0\", \"0.0\", \"7.0\", \"17.0\", \"0.0\", \"0.0\", \"18.0\", \"0.0\", \"2.0\", \"0.0\", \"0.0\", \"22.0\"]}, {\"fieldPath\": \"bank_transfer_amount\", \"uniqueCount\": 19, \"uniqueProportion\": 0.1919191919191919, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"0.0\", \"max\": \"26.0\", \"mean\": \"4.151515151515151\", \"median\": \"0.0\", \"sampleValues\": [\"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\"]}, {\"fieldPath\": \"gift_card_amount\", \"uniqueCount\": 11, \"uniqueProportion\": 0.1111111111111111, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"0.0\", \"max\": \"30.0\", \"mean\": \"2.07070707070707\", \"median\": \"0.0\", \"sampleValues\": [\"0.0\", \"0.0\", \"23.0\", \"11.0\", \"0.0\", \"0.0\", \"14.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"6.0\", \"23.0\", \"6.0\", \"28.0\", \"0.0\", \"18.0\", \"26.0\", \"0.0\"]}, {\"fieldPath\": \"amount\", \"uniqueCount\": 32, \"uniqueProportion\": 0.32323232323232326, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"0.0\", \"max\": \"58.0\", \"mean\": \"16.888888888888882\", \"median\": \"17.0\", \"sampleValues\": [\"23.0\", \"25.0\", \"23.0\", \"11.0\", \"26.0\", \"1.0\", \"14.0\", \"24.0\", \"2.0\", \"0.0\", \"7.0\", \"17.0\", \"6.0\", \"23.0\", \"24.0\", \"28.0\", \"2.0\", \"18.0\", \"26.0\", \"22.0\"]}]}", - "contentType": "application/json" + "value": "{\"timestampMillis\": 1655162357642, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 99, \"columnCount\": 9, \"fieldProfiles\": [{\"fieldPath\": \"order_id\", \"uniqueCount\": 99, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"86\", \"4\", \"9\", \"44\", \"24\", \"3\", \"62\", \"95\", \"81\", \"65\", \"94\", \"42\", \"19\", \"23\", \"58\", \"59\", \"76\", \"43\", \"93\", \"15\"]}, {\"fieldPath\": \"customer_id\", \"uniqueCount\": 62, \"uniqueProportion\": 0.6262626262626263, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1\", \"max\": \"99\", \"mean\": \"48.25252525252523\", \"median\": \"50\", \"stdev\": \"27.781341350472964\", \"sampleValues\": [\"68\", \"50\", \"53\", \"66\", \"3\", \"94\", \"57\", \"27\", \"76\", \"26\", \"63\", \"92\", \"54\", \"22\", \"22\", \"30\", \"25\", \"31\", \"66\", \"25\"]}, {\"fieldPath\": \"order_date\", \"uniqueCount\": 69, \"uniqueProportion\": 0.696969696969697, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"2018-01-01\", \"max\": \"2018-04-09\", \"sampleValues\": [\"2018-03-26\", \"2018-01-05\", \"2018-01-12\", \"2018-02-17\", \"2018-01-27\", \"2018-01-04\", \"2018-03-05\", \"2018-04-04\", \"2018-03-23\", \"2018-03-08\", \"2018-04-03\", \"2018-02-16\", \"2018-01-22\", \"2018-01-26\", \"2018-03-01\", \"2018-03-02\", \"2018-03-20\", \"2018-02-17\", \"2018-04-03\", \"2018-01-17\"]}, {\"fieldPath\": \"status\", \"uniqueCount\": 5, \"uniqueProportion\": 0.050505050505050504, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"placed\", \"completed\", \"completed\", \"completed\", \"completed\", \"completed\", \"completed\", \"placed\", \"shipped\", \"completed\", \"placed\", \"completed\", \"completed\", \"return_pending\", \"completed\", \"completed\", \"completed\", \"completed\", \"placed\", \"completed\"]}, {\"fieldPath\": \"credit_card_amount\", \"uniqueCount\": 25, \"uniqueProportion\": 0.25252525252525254, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"0.0\", \"max\": \"30.0\", \"mean\": \"8.797979797979806\", \"median\": \"0.0\", \"sampleValues\": [\"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\"]}, {\"fieldPath\": \"coupon_amount\", \"uniqueCount\": 12, \"uniqueProportion\": 0.12121212121212122, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"0.0\", \"max\": \"26.0\", \"mean\": \"1.8686868686868698\", \"median\": \"0.0\", \"sampleValues\": [\"23.0\", \"25.0\", \"0.0\", \"0.0\", \"26.0\", \"1.0\", \"0.0\", \"24.0\", \"2.0\", \"0.0\", \"7.0\", \"17.0\", \"0.0\", \"0.0\", \"18.0\", \"0.0\", \"2.0\", \"0.0\", \"0.0\", \"22.0\"]}, {\"fieldPath\": \"bank_transfer_amount\", \"uniqueCount\": 19, \"uniqueProportion\": 0.1919191919191919, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"0.0\", \"max\": \"26.0\", \"mean\": \"4.151515151515151\", \"median\": \"0.0\", \"sampleValues\": [\"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\"]}, {\"fieldPath\": \"gift_card_amount\", \"uniqueCount\": 11, \"uniqueProportion\": 0.1111111111111111, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"0.0\", \"max\": \"30.0\", \"mean\": \"2.07070707070707\", \"median\": \"0.0\", \"sampleValues\": [\"0.0\", \"0.0\", \"23.0\", \"11.0\", \"0.0\", \"0.0\", \"14.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"0.0\", \"6.0\", \"23.0\", \"6.0\", \"28.0\", \"0.0\", \"18.0\", \"26.0\", \"0.0\"]}, {\"fieldPath\": \"amount\", \"uniqueCount\": 32, \"uniqueProportion\": 0.32323232323232326, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"0.0\", \"max\": \"58.0\", \"mean\": \"16.888888888888882\", \"median\": \"17.0\", \"sampleValues\": [\"23.0\", \"25.0\", \"23.0\", \"11.0\", \"26.0\", \"1.0\", \"14.0\", \"24.0\", \"2.0\", \"0.0\", \"7.0\", \"17.0\", \"6.0\", \"23.0\", \"24.0\", \"28.0\", \"2.0\", \"18.0\", \"26.0\", \"22.0\"]}]}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162388123, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162388123, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.raw_customers,PROD)", @@ -2047,18 +2047,18 @@ "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { - "value": "{\"timestampMillis\": 1655162357386, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 100, \"columnCount\": 3, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 100, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"20\", \"23\", \"40\", \"59\", \"74\", \"96\", \"27\", \"45\", \"53\", \"73\", \"87\", \"4\", \"41\", \"46\", \"48\", \"64\", \"71\", \"86\", \"12\", \"82\"]}, {\"fieldPath\": \"first_name\", \"uniqueCount\": 79, \"uniqueProportion\": 0.79, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Anna\", \"Mildred\", \"Maria\", \"Adam\", \"Harry\", \"Jacqueline\", \"Benjamin\", \"Scott\", \"Anne\", \"Alan\", \"Phillip\", \"Jimmy\", \"Gloria\", \"Norma\", \"Lillian\", \"David\", \"Gerald\", \"Jason\", \"Amy\", \"Arthur\"]}, {\"fieldPath\": \"last_name\", \"uniqueCount\": 19, \"uniqueProportion\": 0.19, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"A.\", \"A.\", \"A.\", \"A.\", \"A.\", \"A.\", \"B.\", \"B.\", \"B.\", \"B.\", \"B.\", \"C.\", \"C.\", \"C.\", \"C.\", \"C.\", \"C.\", \"C.\", \"D.\", \"D.\"]}]}", - "contentType": "application/json" + "value": "{\"timestampMillis\": 1655162357386, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 100, \"columnCount\": 3, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 100, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"20\", \"23\", \"40\", \"59\", \"74\", \"96\", \"27\", \"45\", \"53\", \"73\", \"87\", \"4\", \"41\", \"46\", \"48\", \"64\", \"71\", \"86\", \"12\", \"82\"]}, {\"fieldPath\": \"first_name\", \"uniqueCount\": 79, \"uniqueProportion\": 0.79, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"Anna\", \"Mildred\", \"Maria\", \"Adam\", \"Harry\", \"Jacqueline\", \"Benjamin\", \"Scott\", \"Anne\", \"Alan\", \"Phillip\", \"Jimmy\", \"Gloria\", \"Norma\", \"Lillian\", \"David\", \"Gerald\", \"Jason\", \"Amy\", \"Arthur\"]}, {\"fieldPath\": \"last_name\", \"uniqueCount\": 19, \"uniqueProportion\": 0.19, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"A.\", \"A.\", \"A.\", \"A.\", \"A.\", \"A.\", \"B.\", \"B.\", \"B.\", \"B.\", \"B.\", \"C.\", \"C.\", \"C.\", \"C.\", \"C.\", \"C.\", \"C.\", \"D.\", \"D.\"]}]}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162388138, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162388138, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.raw_orders,PROD)", @@ -2066,18 +2066,18 @@ "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { - "value": "{\"timestampMillis\": 1655162357622, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 99, \"columnCount\": 4, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 99, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"84\", \"86\", \"87\", \"89\", \"91\", \"92\", \"93\", \"94\", \"95\", \"96\", \"97\", \"98\", \"99\", \"71\", \"72\", \"74\", \"77\", \"78\", \"79\", \"80\"]}, {\"fieldPath\": \"user_id\", \"uniqueCount\": 62, \"uniqueProportion\": 0.6262626262626263, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1\", \"max\": \"99\", \"mean\": \"48.252525252525245\", \"median\": \"50\", \"stdev\": \"27.781341350472957\", \"sampleValues\": [\"70\", \"68\", \"46\", \"21\", \"47\", \"84\", \"66\", \"63\", \"27\", \"90\", \"89\", \"41\", \"85\", \"42\", \"30\", \"9\", \"35\", \"90\", \"52\", \"11\"]}, {\"fieldPath\": \"order_date\", \"uniqueCount\": 69, \"uniqueProportion\": 0.696969696969697, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"2018-01-01\", \"max\": \"2018-04-09\", \"sampleValues\": [\"2018-03-26\", \"2018-03-26\", \"2018-03-27\", \"2018-03-28\", \"2018-03-31\", \"2018-04-02\", \"2018-04-03\", \"2018-04-03\", \"2018-04-04\", \"2018-04-06\", \"2018-04-07\", \"2018-04-07\", \"2018-04-09\", \"2018-03-12\", \"2018-03-14\", \"2018-03-17\", \"2018-03-21\", \"2018-03-23\", \"2018-03-23\", \"2018-03-23\"]}, {\"fieldPath\": \"status\", \"uniqueCount\": 5, \"uniqueProportion\": 0.050505050505050504, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"placed\", \"placed\", \"placed\", \"placed\", \"placed\", \"placed\", \"placed\", \"placed\", \"placed\", \"placed\", \"placed\", \"placed\", \"placed\", \"shipped\", \"shipped\", \"shipped\", \"shipped\", \"shipped\", \"shipped\", \"shipped\"]}]}", - "contentType": "application/json" + "value": "{\"timestampMillis\": 1655162357622, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 99, \"columnCount\": 4, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 99, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"84\", \"86\", \"87\", \"89\", \"91\", \"92\", \"93\", \"94\", \"95\", \"96\", \"97\", \"98\", \"99\", \"71\", \"72\", \"74\", \"77\", \"78\", \"79\", \"80\"]}, {\"fieldPath\": \"user_id\", \"uniqueCount\": 62, \"uniqueProportion\": 0.6262626262626263, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1\", \"max\": \"99\", \"mean\": \"48.252525252525245\", \"median\": \"50\", \"stdev\": \"27.781341350472957\", \"sampleValues\": [\"70\", \"68\", \"46\", \"21\", \"47\", \"84\", \"66\", \"63\", \"27\", \"90\", \"89\", \"41\", \"85\", \"42\", \"30\", \"9\", \"35\", \"90\", \"52\", \"11\"]}, {\"fieldPath\": \"order_date\", \"uniqueCount\": 69, \"uniqueProportion\": 0.696969696969697, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"2018-01-01\", \"max\": \"2018-04-09\", \"sampleValues\": [\"2018-03-26\", \"2018-03-26\", \"2018-03-27\", \"2018-03-28\", \"2018-03-31\", \"2018-04-02\", \"2018-04-03\", \"2018-04-03\", \"2018-04-04\", \"2018-04-06\", \"2018-04-07\", \"2018-04-07\", \"2018-04-09\", \"2018-03-12\", \"2018-03-14\", \"2018-03-17\", \"2018-03-21\", \"2018-03-23\", \"2018-03-23\", \"2018-03-23\"]}, {\"fieldPath\": \"status\", \"uniqueCount\": 5, \"uniqueProportion\": 0.050505050505050504, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"placed\", \"placed\", \"placed\", \"placed\", \"placed\", \"placed\", \"placed\", \"placed\", \"placed\", \"placed\", \"placed\", \"placed\", \"placed\", \"shipped\", \"shipped\", \"shipped\", \"shipped\", \"shipped\", \"shipped\", \"shipped\"]}]}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162388145, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162388145, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, -{ + }, + { "auditHeader": null, "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.raw_payments,PROD)", @@ -2085,2171 +2085,2171 @@ "changeType": "UPSERT", "aspectName": "datasetProfile", "aspect": { - "value": "{\"timestampMillis\": 1655162357609, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 113, \"columnCount\": 4, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 113, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"66\", \"27\", \"30\", \"109\", \"3\", \"17\", \"47\", \"108\", \"4\", \"86\", \"93\", \"106\", \"98\", \"48\", \"107\", \"92\", \"49\", \"22\", \"67\", \"71\"]}, {\"fieldPath\": \"order_id\", \"uniqueCount\": 99, \"uniqueProportion\": 0.8761061946902655, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1\", \"max\": \"99\", \"mean\": \"50.03539823008851\", \"median\": \"51\", \"stdev\": \"28.54317819535489\", \"sampleValues\": [\"58\", \"24\", \"25\", \"95\", \"3\", \"15\", \"42\", \"94\", \"4\", \"76\", \"81\", \"92\", \"86\", \"43\", \"93\", \"80\", \"44\", \"19\", \"58\", \"62\"]}, {\"fieldPath\": \"payment_method\", \"uniqueCount\": 4, \"uniqueProportion\": 0.035398230088495575, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"coupon\", \"coupon\", \"coupon\", \"coupon\", \"coupon\", \"coupon\", \"coupon\", \"coupon\", \"coupon\", \"coupon\", \"coupon\", \"coupon\", \"coupon\", \"gift_card\", \"gift_card\", \"gift_card\", \"gift_card\", \"gift_card\", \"gift_card\", \"gift_card\"]}, {\"fieldPath\": \"amount\", \"uniqueCount\": 30, \"uniqueProportion\": 0.26548672566371684, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"0\", \"max\": \"3000\", \"mean\": \"1479.6460176991145\", \"median\": \"1500\", \"stdev\": \"919.836873351873\", \"sampleValues\": [\"1800\", \"2600\", \"1600\", \"2400\", \"100\", \"2200\", \"1700\", \"700\", \"2500\", \"200\", \"200\", \"200\", \"2300\", \"1800\", \"2600\", \"300\", \"1100\", \"600\", \"600\", \"1400\"]}]}", - "contentType": "application/json" + "value": "{\"timestampMillis\": 1655162357609, \"partitionSpec\": {\"type\": \"FULL_TABLE\", \"partition\": \"FULL_TABLE_SNAPSHOT\"}, \"rowCount\": 113, \"columnCount\": 4, \"fieldProfiles\": [{\"fieldPath\": \"id\", \"uniqueCount\": 113, \"uniqueProportion\": 1.0, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"66\", \"27\", \"30\", \"109\", \"3\", \"17\", \"47\", \"108\", \"4\", \"86\", \"93\", \"106\", \"98\", \"48\", \"107\", \"92\", \"49\", \"22\", \"67\", \"71\"]}, {\"fieldPath\": \"order_id\", \"uniqueCount\": 99, \"uniqueProportion\": 0.8761061946902655, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"1\", \"max\": \"99\", \"mean\": \"50.03539823008851\", \"median\": \"51\", \"stdev\": \"28.54317819535489\", \"sampleValues\": [\"58\", \"24\", \"25\", \"95\", \"3\", \"15\", \"42\", \"94\", \"4\", \"76\", \"81\", \"92\", \"86\", \"43\", \"93\", \"80\", \"44\", \"19\", \"58\", \"62\"]}, {\"fieldPath\": \"payment_method\", \"uniqueCount\": 4, \"uniqueProportion\": 0.035398230088495575, \"nullCount\": 0, \"nullProportion\": 0.0, \"sampleValues\": [\"coupon\", \"coupon\", \"coupon\", \"coupon\", \"coupon\", \"coupon\", \"coupon\", \"coupon\", \"coupon\", \"coupon\", \"coupon\", \"coupon\", \"coupon\", \"gift_card\", \"gift_card\", \"gift_card\", \"gift_card\", \"gift_card\", \"gift_card\", \"gift_card\"]}, {\"fieldPath\": \"amount\", \"uniqueCount\": 30, \"uniqueProportion\": 0.26548672566371684, \"nullCount\": 0, \"nullProportion\": 0.0, \"min\": \"0\", \"max\": \"3000\", \"mean\": \"1479.6460176991145\", \"median\": \"1500\", \"stdev\": \"919.836873351873\", \"sampleValues\": [\"1800\", \"2600\", \"1600\", \"2400\", \"100\", \"2200\", \"1700\", \"700\", \"2500\", \"200\", \"200\", \"200\", \"2300\", \"1800\", \"2600\", \"300\", \"1100\", \"600\", \"600\", \"1400\"]}]}", + "contentType": "application/json" }, "systemMetadata": { - "lastObserved": 1655162388150, - "runId": "bigquery-2022_06_13-16_18_59", - "registryName": null, - "registryVersion": null, - "properties": null + "lastObserved": 1655162388150, + "runId": "bigquery-2022_06_13-16_18_59", + "registryName": null, + "registryVersion": null, + "properties": null } -}, - { - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.orders,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "value": "{\"typeNames\": [\"table\", \"view\"]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1655162322398, - "runId": "dbt-2022_06_13-16_18_42", - "registryName": null, - "registryVersion": null, - "properties": null - } + }, + { + "auditHeader": null, + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.orders,PROD)", + "entityKeyAspect": null, + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "value": "{\"typeNames\": [\"table\", \"view\"]}", + "contentType": "application/json" }, - { - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.orders,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "node_type": "model", - "materialization": "table", - "dbt_file_path": "models/orders.sql", - "catalog_type": "table", - "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", - "manifest_version": "1.0.4", - "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", - "catalog_version": "1.0.4" - }, - "externalUrl": null, - "name": "orders", - "qualifiedName": null, - "description": "This table has basic information about orders, as well as some derived facts based on payments", - "uri": null, - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "model.jaffle_shop.orders", - "platform": "urn:li:dataPlatform:dbt", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "deleted": null, - "dataset": null, - "cluster": null, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [ - { - "fieldPath": "order_id", - "jsonPath": null, - "nullable": false, - "description": "This is a unique identifier for an order", - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INT64", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "customer_id", - "jsonPath": null, - "nullable": false, - "description": "Foreign key to the customers table", - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INT64", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "order_date", - "jsonPath": null, - "nullable": false, - "description": "Date (UTC) that the order was placed", - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} - } - }, - "nativeDataType": "DATE", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "status", - "jsonPath": null, - "nullable": false, - "description": "Orders can be one of the following statuses:\n\n| status | description |\n|----------------|------------------------------------------------------------------------------------------------------------------------|\n| placed | The order has been placed but has not yet left the warehouse |\n| shipped | The order has ben shipped to the customer and is currently in transit |\n| completed | The order has been received by the customer |\n| return_pending | The customer has indicated that they would like to return the order, but it has not yet been received at the warehouse |\n| returned | The order has been returned by the customer and received at the warehouse |", - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "STRING", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "credit_card_amount", - "jsonPath": null, - "nullable": false, - "description": "Amount of the order (AUD) paid for by credit card", - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "FLOAT64", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "coupon_amount", - "jsonPath": null, - "nullable": false, - "description": "Amount of the order (AUD) paid for by coupon", - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "FLOAT64", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "bank_transfer_amount", - "jsonPath": null, - "nullable": false, - "description": "Amount of the order (AUD) paid for by bank transfer", - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "FLOAT64", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "gift_card_amount", - "jsonPath": null, - "nullable": false, - "description": "Amount of the order (AUD) paid for by gift card", - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "FLOAT64", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "amount", - "jsonPath": null, - "nullable": false, - "description": "Total amount (AUD) of the order", - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "FLOAT64", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.stg_orders,PROD)", - "type": "TRANSFORMED" - }, - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.stg_payments,PROD)", - "type": "TRANSFORMED" - } - ], - "fineGrainedLineages": null - } - }, - { - "com.linkedin.pegasus2avro.dataset.ViewProperties": { - "materialized": true, - "viewLogic": "{% set payment_methods = ['credit_card', 'coupon', 'bank_transfer', 'gift_card'] %}\n\nwith orders as (\n\n select * from {{ ref('stg_orders') }}\n\n),\n\npayments as (\n\n select * from {{ ref('stg_payments') }}\n\n),\n\norder_payments as (\n\n select\n order_id,\n\n {% for payment_method in payment_methods -%}\n sum(case when payment_method = '{{ payment_method }}' then amount else 0 end) as {{ payment_method }}_amount,\n {% endfor -%}\n\n sum(amount) as total_amount\n\n from payments\n\n group by 1\n\n),\n\nfinal as (\n\n select\n orders.order_id,\n orders.customer_id,\n orders.order_date,\n orders.status,\n\n {% for payment_method in payment_methods -%}\n\n order_payments.{{ payment_method }}_amount,\n\n {% endfor -%}\n\n order_payments.total_amount as amount\n\n from orders\n\n left join order_payments using (order_id)\n\n)\n\nselect * from final", - "viewLanguage": "SQL" - } + "systemMetadata": { + "lastObserved": 1655162322398, + "runId": "dbt-2022_06_13-16_18_42", + "registryName": null, + "registryVersion": null, + "properties": null + } + }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.orders,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "model", + "materialization": "table", + "dbt_file_path": "models/orders.sql", + "catalog_type": "table", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", + "manifest_version": "1.0.4", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.0.4" + }, + "externalUrl": null, + "name": "orders", + "qualifiedName": null, + "description": "This table has basic information about orders, as well as some derived facts based on payments", + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "model.jaffle_shop.orders", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "deleted": null, + "dataset": null, + "cluster": null, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "order_id", + "jsonPath": null, + "nullable": false, + "description": "This is a unique identifier for an order", + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INT64", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "customer_id", + "jsonPath": null, + "nullable": false, + "description": "Foreign key to the customers table", + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INT64", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "order_date", + "jsonPath": null, + "nullable": false, + "description": "Date (UTC) that the order was placed", + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.DateType": {} + } + }, + "nativeDataType": "DATE", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "status", + "jsonPath": null, + "nullable": false, + "description": "Orders can be one of the following statuses:\n\n| status | description |\n|----------------|------------------------------------------------------------------------------------------------------------------------|\n| placed | The order has been placed but has not yet left the warehouse |\n| shipped | The order has ben shipped to the customer and is currently in transit |\n| completed | The order has been received by the customer |\n| return_pending | The customer has indicated that they would like to return the order, but it has not yet been received at the warehouse |\n| returned | The order has been returned by the customer and received at the warehouse |", + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "credit_card_amount", + "jsonPath": null, + "nullable": false, + "description": "Amount of the order (AUD) paid for by credit card", + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "FLOAT64", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "coupon_amount", + "jsonPath": null, + "nullable": false, + "description": "Amount of the order (AUD) paid for by coupon", + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "FLOAT64", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "bank_transfer_amount", + "jsonPath": null, + "nullable": false, + "description": "Amount of the order (AUD) paid for by bank transfer", + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} } - ] + }, + "nativeDataType": "FLOAT64", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "gift_card_amount", + "jsonPath": null, + "nullable": false, + "description": "Amount of the order (AUD) paid for by gift card", + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "FLOAT64", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "amount", + "jsonPath": null, + "nullable": false, + "description": "Total amount (AUD) of the order", + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "FLOAT64", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + } + ], + "primaryKeys": null, + "foreignKeysSpecs": null, + "foreignKeys": null } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1655162322399, - "runId": "dbt-2022_06_13-16_18_42", - "registryName": null, - "registryVersion": null, - "properties": null - } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.stg_orders,PROD)", + "type": "TRANSFORMED" + }, + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.stg_payments,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": null + } + }, + { + "com.linkedin.pegasus2avro.dataset.ViewProperties": { + "materialized": true, + "viewLogic": "{% set payment_methods = ['credit_card', 'coupon', 'bank_transfer', 'gift_card'] %}\n\nwith orders as (\n\n select * from {{ ref('stg_orders') }}\n\n),\n\npayments as (\n\n select * from {{ ref('stg_payments') }}\n\n),\n\norder_payments as (\n\n select\n order_id,\n\n {% for payment_method in payment_methods -%}\n sum(case when payment_method = '{{ payment_method }}' then amount else 0 end) as {{ payment_method }}_amount,\n {% endfor -%}\n\n sum(amount) as total_amount\n\n from payments\n\n group by 1\n\n),\n\nfinal as (\n\n select\n orders.order_id,\n orders.customer_id,\n orders.order_date,\n orders.status,\n\n {% for payment_method in payment_methods -%}\n\n order_payments.{{ payment_method }}_amount,\n\n {% endfor -%}\n\n order_payments.total_amount as amount\n\n from orders\n\n left join order_payments using (order_id)\n\n)\n\nselect * from final", + "viewLanguage": "SQL" + } + } + ] + } }, - { - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.stg_customers,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "value": "{\"typeNames\": [\"view\", \"view\"]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1655162322404, - "runId": "dbt-2022_06_13-16_18_42", - "registryName": null, - "registryVersion": null, - "properties": null - } + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1655162322399, + "runId": "dbt-2022_06_13-16_18_42", + "registryName": null, + "registryVersion": null, + "properties": null + } + }, + { + "auditHeader": null, + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.stg_customers,PROD)", + "entityKeyAspect": null, + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "value": "{\"typeNames\": [\"view\", \"view\"]}", + "contentType": "application/json" }, - { - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.stg_customers,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "node_type": "model", - "materialization": "view", - "dbt_file_path": "models/staging/stg_customers.sql", - "catalog_type": "view", - "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", - "manifest_version": "1.0.4", - "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", - "catalog_version": "1.0.4" - }, - "externalUrl": null, - "name": "stg_customers", - "qualifiedName": null, - "description": "", - "uri": null, - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "model.jaffle_shop.stg_customers", - "platform": "urn:li:dataPlatform:dbt", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "deleted": null, - "dataset": null, - "cluster": null, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [ - { - "fieldPath": "customer_id", - "jsonPath": null, - "nullable": false, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INT64", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "first_name", - "jsonPath": null, - "nullable": false, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "STRING", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "last_name", - "jsonPath": null, - "nullable": false, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "STRING", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.raw_customers,PROD)", - "type": "TRANSFORMED" - } - ], - "fineGrainedLineages": null - } - }, - { - "com.linkedin.pegasus2avro.dataset.ViewProperties": { - "materialized": false, - "viewLogic": "with source as (\n\n {#-\n Normally we would select from the table here, but we are using seeds to load\n our data in this project\n #}\n select * from {{ ref('raw_customers') }}\n\n),\n\nrenamed as (\n\n select\n id as customer_id,\n first_name,\n last_name\n\n from source\n\n)\n\nselect * from renamed", - "viewLanguage": "SQL" - } + "systemMetadata": { + "lastObserved": 1655162322404, + "runId": "dbt-2022_06_13-16_18_42", + "registryName": null, + "registryVersion": null, + "properties": null + } + }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.stg_customers,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "model", + "materialization": "view", + "dbt_file_path": "models/staging/stg_customers.sql", + "catalog_type": "view", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", + "manifest_version": "1.0.4", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.0.4" + }, + "externalUrl": null, + "name": "stg_customers", + "qualifiedName": null, + "description": "", + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "model.jaffle_shop.stg_customers", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "deleted": null, + "dataset": null, + "cluster": null, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "customer_id", + "jsonPath": null, + "nullable": false, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INT64", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "first_name", + "jsonPath": null, + "nullable": false, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "last_name", + "jsonPath": null, + "nullable": false, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} } - ] + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + } + ], + "primaryKeys": null, + "foreignKeysSpecs": null, + "foreignKeys": null } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1655162322405, - "runId": "dbt-2022_06_13-16_18_42", - "registryName": null, - "registryVersion": null, - "properties": null - } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.raw_customers,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": null + } + }, + { + "com.linkedin.pegasus2avro.dataset.ViewProperties": { + "materialized": false, + "viewLogic": "with source as (\n\n {#-\n Normally we would select from the table here, but we are using seeds to load\n our data in this project\n #}\n select * from {{ ref('raw_customers') }}\n\n),\n\nrenamed as (\n\n select\n id as customer_id,\n first_name,\n last_name\n\n from source\n\n)\n\nselect * from renamed", + "viewLanguage": "SQL" + } + } + ] + } }, - { - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.stg_payments,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "value": "{\"typeNames\": [\"view\", \"view\"]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1655162322409, - "runId": "dbt-2022_06_13-16_18_42", - "registryName": null, - "registryVersion": null, - "properties": null - } + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1655162322405, + "runId": "dbt-2022_06_13-16_18_42", + "registryName": null, + "registryVersion": null, + "properties": null + } + }, + { + "auditHeader": null, + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.stg_payments,PROD)", + "entityKeyAspect": null, + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "value": "{\"typeNames\": [\"view\", \"view\"]}", + "contentType": "application/json" }, - { - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.stg_payments,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "node_type": "model", - "materialization": "view", - "dbt_file_path": "models/staging/stg_payments.sql", - "catalog_type": "view", - "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", - "manifest_version": "1.0.4", - "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", - "catalog_version": "1.0.4" - }, - "externalUrl": null, - "name": "stg_payments", - "qualifiedName": null, - "description": "", - "uri": null, - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "model.jaffle_shop.stg_payments", - "platform": "urn:li:dataPlatform:dbt", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "deleted": null, - "dataset": null, - "cluster": null, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [ - { - "fieldPath": "payment_id", - "jsonPath": null, - "nullable": false, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INT64", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "order_id", - "jsonPath": null, - "nullable": false, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INT64", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "payment_method", - "jsonPath": null, - "nullable": false, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "STRING", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "amount", - "jsonPath": null, - "nullable": false, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "FLOAT64", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.raw_payments,PROD)", - "type": "TRANSFORMED" - } - ], - "fineGrainedLineages": null - } - }, - { - "com.linkedin.pegasus2avro.dataset.ViewProperties": { - "materialized": false, - "viewLogic": "with source as (\n \n {#-\n Normally we would select from the table here, but we are using seeds to load\n our data in this project\n #}\n select * from {{ ref('raw_payments') }}\n\n),\n\nrenamed as (\n\n select\n id as payment_id,\n order_id,\n payment_method,\n\n --`amount` is currently stored in cents, so we convert it to dollars\n amount / 100 as amount\n\n from source\n\n)\n\nselect * from renamed", - "viewLanguage": "SQL" - } + "systemMetadata": { + "lastObserved": 1655162322409, + "runId": "dbt-2022_06_13-16_18_42", + "registryName": null, + "registryVersion": null, + "properties": null + } + }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.stg_payments,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "model", + "materialization": "view", + "dbt_file_path": "models/staging/stg_payments.sql", + "catalog_type": "view", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", + "manifest_version": "1.0.4", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.0.4" + }, + "externalUrl": null, + "name": "stg_payments", + "qualifiedName": null, + "description": "", + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "model.jaffle_shop.stg_payments", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "deleted": null, + "dataset": null, + "cluster": null, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "payment_id", + "jsonPath": null, + "nullable": false, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INT64", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "order_id", + "jsonPath": null, + "nullable": false, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INT64", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "payment_method", + "jsonPath": null, + "nullable": false, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "amount", + "jsonPath": null, + "nullable": false, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} } - ] + }, + "nativeDataType": "FLOAT64", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + } + ], + "primaryKeys": null, + "foreignKeysSpecs": null, + "foreignKeys": null + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.raw_payments,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": null + } + }, + { + "com.linkedin.pegasus2avro.dataset.ViewProperties": { + "materialized": false, + "viewLogic": "with source as (\n \n {#-\n Normally we would select from the table here, but we are using seeds to load\n our data in this project\n #}\n select * from {{ ref('raw_payments') }}\n\n),\n\nrenamed as (\n\n select\n id as payment_id,\n order_id,\n payment_method,\n\n --`amount` is currently stored in cents, so we convert it to dollars\n amount / 100 as amount\n\n from source\n\n)\n\nselect * from renamed", + "viewLanguage": "SQL" } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1655162322410, - "runId": "dbt-2022_06_13-16_18_42", - "registryName": null, - "registryVersion": null, - "properties": null - } + } + ] + } }, - { - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.stg_orders,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "value": "{\"typeNames\": [\"view\", \"view\"]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1655162322414, - "runId": "dbt-2022_06_13-16_18_42", - "registryName": null, - "registryVersion": null, - "properties": null - } + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1655162322410, + "runId": "dbt-2022_06_13-16_18_42", + "registryName": null, + "registryVersion": null, + "properties": null + } + }, + { + "auditHeader": null, + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.stg_orders,PROD)", + "entityKeyAspect": null, + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "value": "{\"typeNames\": [\"view\", \"view\"]}", + "contentType": "application/json" }, - { - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.stg_orders,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "node_type": "model", - "materialization": "view", - "dbt_file_path": "models/staging/stg_orders.sql", - "catalog_type": "view", - "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", - "manifest_version": "1.0.4", - "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", - "catalog_version": "1.0.4" - }, - "externalUrl": null, - "name": "stg_orders", - "qualifiedName": null, - "description": "", - "uri": null, - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "model.jaffle_shop.stg_orders", - "platform": "urn:li:dataPlatform:dbt", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "deleted": null, - "dataset": null, - "cluster": null, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [ - { - "fieldPath": "order_id", - "jsonPath": null, - "nullable": false, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INT64", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "customer_id", - "jsonPath": null, - "nullable": false, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INT64", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "order_date", - "jsonPath": null, - "nullable": false, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} - } - }, - "nativeDataType": "DATE", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "status", - "jsonPath": null, - "nullable": false, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "STRING", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.raw_orders,PROD)", - "type": "TRANSFORMED" - } - ], - "fineGrainedLineages": null - } - }, - { - "com.linkedin.pegasus2avro.dataset.ViewProperties": { - "materialized": false, - "viewLogic": "with source as (\n\n {#-\n Normally we would select from the table here, but we are using seeds to load\n our data in this project\n #}\n select * from {{ ref('raw_orders') }}\n\n),\n\nrenamed as (\n\n select\n id as order_id,\n user_id as customer_id,\n order_date,\n status\n\n from source\n\n)\n\nselect * from renamed", - "viewLanguage": "SQL" - } + "systemMetadata": { + "lastObserved": 1655162322414, + "runId": "dbt-2022_06_13-16_18_42", + "registryName": null, + "registryVersion": null, + "properties": null + } + }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.stg_orders,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "model", + "materialization": "view", + "dbt_file_path": "models/staging/stg_orders.sql", + "catalog_type": "view", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", + "manifest_version": "1.0.4", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.0.4" + }, + "externalUrl": null, + "name": "stg_orders", + "qualifiedName": null, + "description": "", + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "model.jaffle_shop.stg_orders", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "deleted": null, + "dataset": null, + "cluster": null, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "order_id", + "jsonPath": null, + "nullable": false, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INT64", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "customer_id", + "jsonPath": null, + "nullable": false, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INT64", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "order_date", + "jsonPath": null, + "nullable": false, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.DateType": {} + } + }, + "nativeDataType": "DATE", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "status", + "jsonPath": null, + "nullable": false, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} } - ] + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + } + ], + "primaryKeys": null, + "foreignKeysSpecs": null, + "foreignKeys": null + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.raw_orders,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": null } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1655162322415, - "runId": "dbt-2022_06_13-16_18_42", - "registryName": null, - "registryVersion": null, - "properties": null - } + }, + { + "com.linkedin.pegasus2avro.dataset.ViewProperties": { + "materialized": false, + "viewLogic": "with source as (\n\n {#-\n Normally we would select from the table here, but we are using seeds to load\n our data in this project\n #}\n select * from {{ ref('raw_orders') }}\n\n),\n\nrenamed as (\n\n select\n id as order_id,\n user_id as customer_id,\n order_date,\n status\n\n from source\n\n)\n\nselect * from renamed", + "viewLanguage": "SQL" + } + } + ] + } }, - { - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.transformers_customers,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "value": "{\"typeNames\": [\"ephemeral\", \"view\"]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1655162322419, - "runId": "dbt-2022_06_13-16_18_42", - "registryName": null, - "registryVersion": null, - "properties": null - } + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1655162322415, + "runId": "dbt-2022_06_13-16_18_42", + "registryName": null, + "registryVersion": null, + "properties": null + } + }, + { + "auditHeader": null, + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.transformers_customers,PROD)", + "entityKeyAspect": null, + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "value": "{\"typeNames\": [\"ephemeral\", \"view\"]}", + "contentType": "application/json" }, - { - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.transformers_customers,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "node_type": "model", - "materialization": "ephemeral", - "dbt_file_path": "models/transformers/transformers_customers.sql", - "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", - "manifest_version": "1.0.4", - "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", - "catalog_version": "1.0.4" - }, - "externalUrl": null, - "name": "transformers_customers", - "qualifiedName": null, - "description": "", - "uri": null, - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "model.jaffle_shop.transformers_customers", - "platform": "urn:li:dataPlatform:dbt", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "deleted": null, - "dataset": null, - "cluster": null, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.raw_customers,PROD)", - "type": "TRANSFORMED" - } - ], - "fineGrainedLineages": null - } - }, - { - "com.linkedin.pegasus2avro.dataset.ViewProperties": { - "materialized": false, - "viewLogic": "with source as (\n\n {#-\n Normally we would select from the table here, but we are using seeds to load\n our data in this project\n #}\n select * from {{ ref('raw_customers') }}\n\n),\n\nrenamed as (\n\n select\n id as customer_id,\n first_name,\n last_name\n\n from source\n\n)\n\nselect * from renamed", - "viewLanguage": "SQL" - } - } - ] + "systemMetadata": { + "lastObserved": 1655162322419, + "runId": "dbt-2022_06_13-16_18_42", + "registryName": null, + "registryVersion": null, + "properties": null + } + }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.transformers_customers,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "model", + "materialization": "ephemeral", + "dbt_file_path": "models/transformers/transformers_customers.sql", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", + "manifest_version": "1.0.4", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.0.4" + }, + "externalUrl": null, + "name": "transformers_customers", + "qualifiedName": null, + "description": "", + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "model.jaffle_shop.transformers_customers", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "deleted": null, + "dataset": null, + "cluster": null, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [], + "primaryKeys": null, + "foreignKeysSpecs": null, + "foreignKeys": null } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1655162322420, - "runId": "dbt-2022_06_13-16_18_42", - "registryName": null, - "registryVersion": null, - "properties": null - } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.raw_customers,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": null + } + }, + { + "com.linkedin.pegasus2avro.dataset.ViewProperties": { + "materialized": false, + "viewLogic": "with source as (\n\n {#-\n Normally we would select from the table here, but we are using seeds to load\n our data in this project\n #}\n select * from {{ ref('raw_customers') }}\n\n),\n\nrenamed as (\n\n select\n id as customer_id,\n first_name,\n last_name\n\n from source\n\n)\n\nselect * from renamed", + "viewLanguage": "SQL" + } + } + ] + } }, - { - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.raw_customers,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "value": "{\"typeNames\": [\"seed\"]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1655162322423, - "runId": "dbt-2022_06_13-16_18_42", - "registryName": null, - "registryVersion": null, - "properties": null - } + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1655162322420, + "runId": "dbt-2022_06_13-16_18_42", + "registryName": null, + "registryVersion": null, + "properties": null + } + }, + { + "auditHeader": null, + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.raw_customers,PROD)", + "entityKeyAspect": null, + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "value": "{\"typeNames\": [\"seed\"]}", + "contentType": "application/json" }, - { - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.raw_customers,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "node_type": "seed", - "materialization": "seed", - "dbt_file_path": "data/raw_customers.csv", - "catalog_type": "table", - "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", - "manifest_version": "1.0.4", - "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", - "catalog_version": "1.0.4" - }, - "externalUrl": null, - "name": "raw_customers", - "qualifiedName": null, - "description": "", - "uri": null, - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "seed.jaffle_shop.raw_customers", - "platform": "urn:li:dataPlatform:dbt", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "deleted": null, - "dataset": null, - "cluster": null, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [ - { - "fieldPath": "id", - "jsonPath": null, - "nullable": false, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INT64", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "first_name", - "jsonPath": null, - "nullable": false, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "STRING", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "last_name", - "jsonPath": null, - "nullable": false, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "STRING", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null - } + "systemMetadata": { + "lastObserved": 1655162322423, + "runId": "dbt-2022_06_13-16_18_42", + "registryName": null, + "registryVersion": null, + "properties": null + } + }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.raw_customers,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "seed", + "materialization": "seed", + "dbt_file_path": "data/raw_customers.csv", + "catalog_type": "table", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", + "manifest_version": "1.0.4", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.0.4" + }, + "externalUrl": null, + "name": "raw_customers", + "qualifiedName": null, + "description": "", + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "seed.jaffle_shop.raw_customers", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "deleted": null, + "dataset": null, + "cluster": null, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "id", + "jsonPath": null, + "nullable": false, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} } - ] + }, + "nativeDataType": "INT64", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "first_name", + "jsonPath": null, + "nullable": false, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "last_name", + "jsonPath": null, + "nullable": false, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + } + ], + "primaryKeys": null, + "foreignKeysSpecs": null, + "foreignKeys": null } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1655162322423, - "runId": "dbt-2022_06_13-16_18_42", - "registryName": null, - "registryVersion": null, - "properties": null - } + } + ] + } }, - { - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.raw_orders,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "value": "{\"typeNames\": [\"seed\"]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1655162322427, - "runId": "dbt-2022_06_13-16_18_42", - "registryName": null, - "registryVersion": null, - "properties": null - } + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1655162322423, + "runId": "dbt-2022_06_13-16_18_42", + "registryName": null, + "registryVersion": null, + "properties": null + } + }, + { + "auditHeader": null, + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.raw_orders,PROD)", + "entityKeyAspect": null, + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "value": "{\"typeNames\": [\"seed\"]}", + "contentType": "application/json" }, - { - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.raw_orders,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "node_type": "seed", - "materialization": "seed", - "dbt_file_path": "data/raw_orders.csv", - "catalog_type": "table", - "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", - "manifest_version": "1.0.4", - "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", - "catalog_version": "1.0.4" - }, - "externalUrl": null, - "name": "raw_orders", - "qualifiedName": null, - "description": "", - "uri": null, - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "seed.jaffle_shop.raw_orders", - "platform": "urn:li:dataPlatform:dbt", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "deleted": null, - "dataset": null, - "cluster": null, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [ - { - "fieldPath": "id", - "jsonPath": null, - "nullable": false, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INT64", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "user_id", - "jsonPath": null, - "nullable": false, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INT64", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "order_date", - "jsonPath": null, - "nullable": false, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} - } - }, - "nativeDataType": "DATE", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "status", - "jsonPath": null, - "nullable": false, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "STRING", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null - } + "systemMetadata": { + "lastObserved": 1655162322427, + "runId": "dbt-2022_06_13-16_18_42", + "registryName": null, + "registryVersion": null, + "properties": null + } + }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.raw_orders,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "seed", + "materialization": "seed", + "dbt_file_path": "data/raw_orders.csv", + "catalog_type": "table", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", + "manifest_version": "1.0.4", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.0.4" + }, + "externalUrl": null, + "name": "raw_orders", + "qualifiedName": null, + "description": "", + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "seed.jaffle_shop.raw_orders", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "deleted": null, + "dataset": null, + "cluster": null, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "id", + "jsonPath": null, + "nullable": false, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} } - ] + }, + "nativeDataType": "INT64", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "user_id", + "jsonPath": null, + "nullable": false, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INT64", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "order_date", + "jsonPath": null, + "nullable": false, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.DateType": {} + } + }, + "nativeDataType": "DATE", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "status", + "jsonPath": null, + "nullable": false, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + } + ], + "primaryKeys": null, + "foreignKeysSpecs": null, + "foreignKeys": null } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1655162322427, - "runId": "dbt-2022_06_13-16_18_42", - "registryName": null, - "registryVersion": null, - "properties": null - } + } + ] + } }, - { - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.raw_payments,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "value": "{\"typeNames\": [\"seed\"]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1655162322431, - "runId": "dbt-2022_06_13-16_18_42", - "registryName": null, - "registryVersion": null, - "properties": null - } + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1655162322427, + "runId": "dbt-2022_06_13-16_18_42", + "registryName": null, + "registryVersion": null, + "properties": null + } + }, + { + "auditHeader": null, + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.raw_payments,PROD)", + "entityKeyAspect": null, + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "value": "{\"typeNames\": [\"seed\"]}", + "contentType": "application/json" }, - { - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.raw_payments,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "node_type": "seed", - "materialization": "seed", - "dbt_file_path": "data/raw_payments.csv", - "catalog_type": "table", - "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", - "manifest_version": "1.0.4", - "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", - "catalog_version": "1.0.4" - }, - "externalUrl": null, - "name": "raw_payments", - "qualifiedName": null, - "description": "", - "uri": null, - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "seed.jaffle_shop.raw_payments", - "platform": "urn:li:dataPlatform:dbt", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "deleted": null, - "dataset": null, - "cluster": null, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [ - { - "fieldPath": "id", - "jsonPath": null, - "nullable": false, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INT64", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "order_id", - "jsonPath": null, - "nullable": false, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INT64", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "payment_method", - "jsonPath": null, - "nullable": false, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "STRING", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "amount", - "jsonPath": null, - "nullable": false, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INT64", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null - } + "systemMetadata": { + "lastObserved": 1655162322431, + "runId": "dbt-2022_06_13-16_18_42", + "registryName": null, + "registryVersion": null, + "properties": null + } + }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.raw_payments,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "seed", + "materialization": "seed", + "dbt_file_path": "data/raw_payments.csv", + "catalog_type": "table", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", + "manifest_version": "1.0.4", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.0.4" + }, + "externalUrl": null, + "name": "raw_payments", + "qualifiedName": null, + "description": "", + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "seed.jaffle_shop.raw_payments", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "deleted": null, + "dataset": null, + "cluster": null, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "id", + "jsonPath": null, + "nullable": false, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INT64", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "order_id", + "jsonPath": null, + "nullable": false, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INT64", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "payment_method", + "jsonPath": null, + "nullable": false, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "amount", + "jsonPath": null, + "nullable": false, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} } - ] + }, + "nativeDataType": "INT64", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + } + ], + "primaryKeys": null, + "foreignKeysSpecs": null, + "foreignKeys": null } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1655162322432, - "runId": "dbt-2022_06_13-16_18_42", - "registryName": null, - "registryVersion": null, - "properties": null - } + } + ] + } }, - { - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "value": "{\"typeNames\": [\"table\", \"view\"]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1655162322509, - "runId": "dbt-2022_06_13-16_18_42", - "registryName": null, - "registryVersion": null, - "properties": null - } + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1655162322432, + "runId": "dbt-2022_06_13-16_18_42", + "registryName": null, + "registryVersion": null, + "properties": null + } + }, + { + "auditHeader": null, + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers,PROD)", + "entityKeyAspect": null, + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "value": "{\"typeNames\": [\"table\", \"view\"]}", + "contentType": "application/json" }, - { - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "node_type": "model", - "materialization": "table", - "dbt_file_path": "models/customers.sql", - "catalog_type": "table", - "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", - "manifest_version": "1.0.4", - "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", - "catalog_version": "1.0.4" - }, - "externalUrl": null, - "name": "customers", - "qualifiedName": null, - "description": "This table has basic information about a customer, as well as some derived facts based on a customer's orders", - "uri": null, - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "model.jaffle_shop.customers", - "platform": "urn:li:dataPlatform:dbt", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "deleted": null, - "dataset": null, - "cluster": null, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [ - { - "fieldPath": "customer_id", - "jsonPath": null, - "nullable": false, - "description": "This is a unique identifier for a customer", - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INT64", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "first_name", - "jsonPath": null, - "nullable": false, - "description": "Customer's first name. PII.", - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "STRING", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "last_name", - "jsonPath": null, - "nullable": false, - "description": "Customer's last name. PII.", - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "STRING", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "first_order", - "jsonPath": null, - "nullable": false, - "description": "Date (UTC) of a customer's first order", - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} - } - }, - "nativeDataType": "DATE", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "most_recent_order", - "jsonPath": null, - "nullable": false, - "description": "Date (UTC) of a customer's most recent order", - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.DateType": {} - } - }, - "nativeDataType": "DATE", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "number_of_orders", - "jsonPath": null, - "nullable": false, - "description": "Count of the number of orders a customer has placed", - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "INT64", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - }, - { - "fieldPath": "customer_lifetime_value", - "jsonPath": null, - "nullable": false, - "description": null, - "created": null, - "lastModified": null, - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.NumberType": {} - } - }, - "nativeDataType": "FLOAT64", - "recursive": false, - "globalTags": null, - "glossaryTerms": null, - "isPartOfKey": false, - "isPartitioningKey": null, - "jsonProps": null - } - ], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.customers_source,PROD)", - "type": "TRANSFORMED" - }, - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.stg_customers,PROD)", - "type": "TRANSFORMED" - }, - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.transformers_customers,PROD)", - "type": "TRANSFORMED" - }, - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.stg_orders,PROD)", - "type": "TRANSFORMED" - }, - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.stg_payments,PROD)", - "type": "TRANSFORMED" - } - ], - "fineGrainedLineages": null - } - }, - { - "com.linkedin.pegasus2avro.dataset.ViewProperties": { - "materialized": true, - "viewLogic": "with customers as (\n\n select * from {{ ref('stg_customers') }}\n\n),\n\nephemeral_customers as (\n\n select * from {{ ref('transformers_customers') }}\n\n),\n\norders as (\n\n select * from {{ ref('stg_orders') }}\n\n),\n\npayments as (\n\n select * from {{ ref('stg_payments') }}\n\n),\n\nsource_customers as (\n\n select * from {{ source('jaffle_shop', 'customers_source') }}\n\n),\n\ncustomer_orders as (\n\n select\n customer_id,\n\n min(order_date) as first_order,\n max(order_date) as most_recent_order,\n count(order_id) as number_of_orders\n from orders\n\n group by 1\n\n),\n\ncustomer_payments as (\n\n select\n orders.customer_id,\n sum(amount) as total_amount\n\n from payments\n\n left join orders using (order_id)\n\n group by 1\n\n),\n\nfinal as (\n\n select\n customers.customer_id,\n customers.first_name,\n customers.last_name,\n customer_orders.first_order,\n customer_orders.most_recent_order,\n customer_orders.number_of_orders,\n customer_payments.total_amount as customer_lifetime_value\n\n from customers\n\n left join customer_orders using (customer_id)\n\n left join customer_payments using (customer_id)\n\n)\n\nselect * from final", - "viewLanguage": "SQL" - } + "systemMetadata": { + "lastObserved": 1655162322509, + "runId": "dbt-2022_06_13-16_18_42", + "registryName": null, + "registryVersion": null, + "properties": null + } + }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "model", + "materialization": "table", + "dbt_file_path": "models/customers.sql", + "catalog_type": "table", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", + "manifest_version": "1.0.4", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.0.4" + }, + "externalUrl": null, + "name": "customers", + "qualifiedName": null, + "description": "This table has basic information about a customer, as well as some derived facts based on a customer's orders", + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "model.jaffle_shop.customers", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "deleted": null, + "dataset": null, + "cluster": null, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "customer_id", + "jsonPath": null, + "nullable": false, + "description": "This is a unique identifier for a customer", + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INT64", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "first_name", + "jsonPath": null, + "nullable": false, + "description": "Customer's first name. PII.", + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "last_name", + "jsonPath": null, + "nullable": false, + "description": "Customer's last name. PII.", + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} } - ] + }, + "nativeDataType": "STRING", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "first_order", + "jsonPath": null, + "nullable": false, + "description": "Date (UTC) of a customer's first order", + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.DateType": {} + } + }, + "nativeDataType": "DATE", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "most_recent_order", + "jsonPath": null, + "nullable": false, + "description": "Date (UTC) of a customer's most recent order", + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.DateType": {} + } + }, + "nativeDataType": "DATE", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "number_of_orders", + "jsonPath": null, + "nullable": false, + "description": "Count of the number of orders a customer has placed", + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INT64", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + }, + { + "fieldPath": "customer_lifetime_value", + "jsonPath": null, + "nullable": false, + "description": null, + "created": null, + "lastModified": null, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "FLOAT64", + "recursive": false, + "globalTags": null, + "glossaryTerms": null, + "isPartOfKey": false, + "isPartitioningKey": null, + "jsonProps": null + } + ], + "primaryKeys": null, + "foreignKeysSpecs": null, + "foreignKeys": null + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.customers_source,PROD)", + "type": "TRANSFORMED" + }, + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.stg_customers,PROD)", + "type": "TRANSFORMED" + }, + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.transformers_customers,PROD)", + "type": "TRANSFORMED" + }, + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.stg_orders,PROD)", + "type": "TRANSFORMED" + }, + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.stg_payments,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": null + } + }, + { + "com.linkedin.pegasus2avro.dataset.ViewProperties": { + "materialized": true, + "viewLogic": "with customers as (\n\n select * from {{ ref('stg_customers') }}\n\n),\n\nephemeral_customers as (\n\n select * from {{ ref('transformers_customers') }}\n\n),\n\norders as (\n\n select * from {{ ref('stg_orders') }}\n\n),\n\npayments as (\n\n select * from {{ ref('stg_payments') }}\n\n),\n\nsource_customers as (\n\n select * from {{ source('jaffle_shop', 'customers_source') }}\n\n),\n\ncustomer_orders as (\n\n select\n customer_id,\n\n min(order_date) as first_order,\n max(order_date) as most_recent_order,\n count(order_id) as number_of_orders\n from orders\n\n group by 1\n\n),\n\ncustomer_payments as (\n\n select\n orders.customer_id,\n sum(amount) as total_amount\n\n from payments\n\n left join orders using (order_id)\n\n group by 1\n\n),\n\nfinal as (\n\n select\n customers.customer_id,\n customers.first_name,\n customers.last_name,\n customer_orders.first_order,\n customer_orders.most_recent_order,\n customer_orders.number_of_orders,\n customer_payments.total_amount as customer_lifetime_value\n\n from customers\n\n left join customer_orders using (customer_id)\n\n left join customer_payments using (customer_id)\n\n)\n\nselect * from final", + "viewLanguage": "SQL" } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1655162322510, - "runId": "dbt-2022_06_13-16_18_42", - "registryName": null, - "registryVersion": null, - "properties": null - } + } + ] + } }, - { - "auditHeader": null, - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers_source,PROD)", - "entityKeyAspect": null, - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "value": "{\"typeNames\": [\"source\"]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1655162322523, - "runId": "dbt-2022_06_13-16_18_42", - "registryName": null, - "registryVersion": null, - "properties": null - } + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1655162322510, + "runId": "dbt-2022_06_13-16_18_42", + "registryName": null, + "registryVersion": null, + "properties": null + } + }, + { + "auditHeader": null, + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers_source,PROD)", + "entityKeyAspect": null, + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "value": "{\"typeNames\": [\"source\"]}", + "contentType": "application/json" }, - { - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers_source,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "node_type": "source", - "dbt_file_path": "models/schema.yml", - "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", - "manifest_version": "1.0.4", - "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", - "catalog_version": "1.0.4" - }, - "externalUrl": null, - "name": "customers_source", - "qualifiedName": null, - "description": "", - "uri": null, - "tags": [] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "source.jaffle_shop.jaffle_shop.customers_source", - "platform": "urn:li:dataPlatform:dbt", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "deleted": null, - "dataset": null, - "cluster": null, - "hash": "", - "platformSchema": { - "com.linkedin.pegasus2avro.schema.MySqlDDL": { - "tableSchema": "" - } - }, - "fields": [], - "primaryKeys": null, - "foreignKeysSpecs": null, - "foreignKeys": null - } - }, - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.customers_source,PROD)", - "type": "TRANSFORMED" - } - ], - "fineGrainedLineages": null - } - } - ] + "systemMetadata": { + "lastObserved": 1655162322523, + "runId": "dbt-2022_06_13-16_18_42", + "registryName": null, + "registryVersion": null, + "properties": null + } + }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers_source,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "node_type": "source", + "dbt_file_path": "models/schema.yml", + "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v4.json", + "manifest_version": "1.0.4", + "catalog_schema": "https://schemas.getdbt.com/dbt/catalog/v1.json", + "catalog_version": "1.0.4" + }, + "externalUrl": null, + "name": "customers_source", + "qualifiedName": null, + "description": "", + "uri": null, + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "source.jaffle_shop.jaffle_shop.customers_source", + "platform": "urn:li:dataPlatform:dbt", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "deleted": null, + "dataset": null, + "cluster": null, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [], + "primaryKeys": null, + "foreignKeysSpecs": null, + "foreignKeys": null } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1655162322524, - "runId": "dbt-2022_06_13-16_18_42", - "registryName": null, - "registryVersion": null, - "properties": null - } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.customers_source,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": null + } + } + ] + } }, - { - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.orders,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.orders,PROD)", - "type": "TRANSFORMED" - } - ], - "fineGrainedLineages": null - } - } - ] + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1655162322524, + "runId": "dbt-2022_06_13-16_18_42", + "registryName": null, + "registryVersion": null, + "properties": null + } + }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.orders,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.orders,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": null } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1655162322527, - "runId": "dbt-2022_06_13-16_18_42", - "registryName": null, - "registryVersion": null, - "properties": null - } + } + ] + } }, - { - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.stg_customers,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.stg_customers,PROD)", - "type": "TRANSFORMED" - } - ], - "fineGrainedLineages": null - } - } - ] + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1655162322527, + "runId": "dbt-2022_06_13-16_18_42", + "registryName": null, + "registryVersion": null, + "properties": null + } + }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.stg_customers,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.stg_customers,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": null } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1655162322528, - "runId": "dbt-2022_06_13-16_18_42", - "registryName": null, - "registryVersion": null, - "properties": null - } + } + ] + } }, - { - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.stg_payments,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.stg_payments,PROD)", - "type": "TRANSFORMED" - } - ], - "fineGrainedLineages": null - } - } - ] + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1655162322528, + "runId": "dbt-2022_06_13-16_18_42", + "registryName": null, + "registryVersion": null, + "properties": null + } + }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.stg_payments,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.stg_payments,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": null } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1655162322529, - "runId": "dbt-2022_06_13-16_18_42", - "registryName": null, - "registryVersion": null, - "properties": null - } + } + ] + } }, - { - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.stg_orders,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.stg_orders,PROD)", - "type": "TRANSFORMED" - } - ], - "fineGrainedLineages": null - } - } - ] + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1655162322529, + "runId": "dbt-2022_06_13-16_18_42", + "registryName": null, + "registryVersion": null, + "properties": null + } + }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.stg_orders,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.stg_orders,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": null } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1655162322530, - "runId": "dbt-2022_06_13-16_18_42", - "registryName": null, - "registryVersion": null, - "properties": null - } + } + ] + } }, - { - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.raw_customers,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.raw_customers,PROD)", - "type": "TRANSFORMED" - } - ], - "fineGrainedLineages": null - } - } - ] + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1655162322530, + "runId": "dbt-2022_06_13-16_18_42", + "registryName": null, + "registryVersion": null, + "properties": null + } + }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.raw_customers,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.raw_customers,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": null } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1655162322531, - "runId": "dbt-2022_06_13-16_18_42", - "registryName": null, - "registryVersion": null, - "properties": null - } + } + ] + } }, - { - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.raw_orders,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.raw_orders,PROD)", - "type": "TRANSFORMED" - } - ], - "fineGrainedLineages": null - } - } - ] + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1655162322531, + "runId": "dbt-2022_06_13-16_18_42", + "registryName": null, + "registryVersion": null, + "properties": null + } + }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.raw_orders,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.raw_orders,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": null } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1655162322532, - "runId": "dbt-2022_06_13-16_18_42", - "registryName": null, - "registryVersion": null, - "properties": null - } + } + ] + } }, - { - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.raw_payments,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.raw_payments,PROD)", - "type": "TRANSFORMED" - } - ], - "fineGrainedLineages": null - } - } - ] + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1655162322532, + "runId": "dbt-2022_06_13-16_18_42", + "registryName": null, + "registryVersion": null, + "properties": null + } + }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.raw_payments,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.raw_payments,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": null } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1655162322533, - "runId": "dbt-2022_06_13-16_18_42", - "registryName": null, - "registryVersion": null, - "properties": null - } + } + ] + } }, - { - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.customers,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { - "upstreams": [ - { - "auditStamp": { - "time": 0, - "actor": "urn:li:corpuser:unknown", - "impersonator": null - }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers,PROD)", - "type": "TRANSFORMED" - } - ], - "fineGrainedLineages": null - } - } - ] + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1655162322533, + "runId": "dbt-2022_06_13-16_18_42", + "registryName": null, + "registryVersion": null, + "properties": null + } + }, + { + "auditHeader": null, + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,cypress_project.jaffle_shop.customers,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 0, + "actor": "urn:li:corpuser:unknown", + "impersonator": null + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers,PROD)", + "type": "TRANSFORMED" + } + ], + "fineGrainedLineages": null } - }, - "proposedDelta": null, - "systemMetadata": { - "lastObserved": 1655162322536, - "runId": "dbt-2022_06_13-16_18_42", - "registryName": null, - "registryVersion": null, - "properties": null - } + } + ] + } + }, + "proposedDelta": null, + "systemMetadata": { + "lastObserved": 1655162322536, + "runId": "dbt-2022_06_13-16_18_42", + "registryName": null, + "registryVersion": null, + "properties": null } + } ] diff --git a/smoke-test/tests/cypress/data.json b/smoke-test/tests/cypress/data.json index 391eba1fe9342..5253b7a33b085 100644 --- a/smoke-test/tests/cypress/data.json +++ b/smoke-test/tests/cypress/data.json @@ -202,15 +202,15 @@ ], "fineGrainedLineages": [ { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:kafka,SampleCypressKafkaDataset,PROD),field_bar)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleCypressHdfsDataset,PROD),shipment_info)" - ], - "confidenceScore": 1.0 + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:kafka,SampleCypressKafkaDataset,PROD),field_bar)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleCypressHdfsDataset,PROD),shipment_info)" + ], + "confidenceScore": 1.0 } ] } @@ -400,7 +400,10 @@ }, { "com.linkedin.pegasus2avro.common.GlobalTags": { - "tags": [{ "tag": "urn:li:tag:Cypress" }, { "tag": "urn:li:tag:Cypress2" }] + "tags": [ + { "tag": "urn:li:tag:Cypress" }, + { "tag": "urn:li:tag:Cypress2" } + ] } } ] @@ -2135,4 +2138,4 @@ }, "systemMetadata": null } -] \ No newline at end of file +] diff --git a/smoke-test/tests/cypress/package.json b/smoke-test/tests/cypress/package.json index ebc1c6b3d7a8b..490284ab4d9d0 100644 --- a/smoke-test/tests/cypress/package.json +++ b/smoke-test/tests/cypress/package.json @@ -1,11 +1,25 @@ { + "scripts": { + "format:fix": "prettier --write .", + "format": "prettier --check .", + "lint:fix": "yarn run format:fix && CI=false yarn run lint --fix", + "lint": "yarn run format && CI=false eslint ." + }, "name": "smoke-test", "version": "1.0.0", "main": "index.js", "license": "MIT", - "devDependencies": { + "dependencies": { "cypress": "12.5.1", "cypress-timestamps": "^1.2.0", "dayjs": "^1.11.7" + }, + "devDependencies": { + "eslint": "^7.32.0 || ^8.2.0", + "eslint-config-airbnb-base": "^15.0.0", + "eslint-config-prettier": "^9.1.0", + "eslint-plugin-cypress": "^2.15.1", + "eslint-plugin-import": "^2.25.2", + "prettier": "^3.2.5" } } diff --git a/smoke-test/tests/cypress/yarn.lock b/smoke-test/tests/cypress/yarn.lock index c5aff25ea1106..2433e9f8fae08 100644 --- a/smoke-test/tests/cypress/yarn.lock +++ b/smoke-test/tests/cypress/yarn.lock @@ -2,6 +2,11 @@ # yarn lockfile v1 +"@aashutoshrathi/word-wrap@^1.2.3": + version "1.2.6" + resolved "https://registry.yarnpkg.com/@aashutoshrathi/word-wrap/-/word-wrap-1.2.6.tgz#bd9154aec9983f77b3a034ecaa015c2e4201f6cf" + integrity sha512-1Yjs2SvM8TflER/OD3cOjhWWOZb58A2t7wpE2S9XfBYTiIl+XFhQG2bjy4Pu1I+EAlCNUzRDYDdFwFYUKvXcIA== + "@cypress/request@^2.88.10": version "2.88.10" resolved "https://registry.npmjs.org/@cypress/request/-/request-2.88.10.tgz" @@ -34,6 +39,83 @@ debug "^3.1.0" lodash.once "^4.1.1" +"@eslint-community/eslint-utils@^4.2.0": + version "4.4.0" + resolved "https://registry.yarnpkg.com/@eslint-community/eslint-utils/-/eslint-utils-4.4.0.tgz#a23514e8fb9af1269d5f7788aa556798d61c6b59" + integrity sha512-1/sA4dwrzBAyeUoQ6oxahHKmrZvsnLCg4RfxW3ZFGGmQkSNQPFNLV9CUEFQP1x9EYXHTo5p6xdhZM1Ne9p/AfA== + dependencies: + eslint-visitor-keys "^3.3.0" + +"@eslint-community/regexpp@^4.6.1": + version "4.10.0" + resolved "https://registry.yarnpkg.com/@eslint-community/regexpp/-/regexpp-4.10.0.tgz#548f6de556857c8bb73bbee70c35dc82a2e74d63" + integrity sha512-Cu96Sd2By9mCNTx2iyKOmq10v22jUVQv0lQnlGNy16oE9589yE+QADPbrMGCkA51cKZSg3Pu/aTJVTGfL/qjUA== + +"@eslint/eslintrc@^2.1.4": + version "2.1.4" + resolved "https://registry.yarnpkg.com/@eslint/eslintrc/-/eslintrc-2.1.4.tgz#388a269f0f25c1b6adc317b5a2c55714894c70ad" + integrity sha512-269Z39MS6wVJtsoUl10L60WdkhJVdPG24Q4eZTH3nnF6lpvSShEK3wQjDX9JRWAUPvPh7COouPpU9IrqaZFvtQ== + dependencies: + ajv "^6.12.4" + debug "^4.3.2" + espree "^9.6.0" + globals "^13.19.0" + ignore "^5.2.0" + import-fresh "^3.2.1" + js-yaml "^4.1.0" + minimatch "^3.1.2" + strip-json-comments "^3.1.1" + +"@eslint/js@8.57.0": + version "8.57.0" + resolved "https://registry.yarnpkg.com/@eslint/js/-/js-8.57.0.tgz#a5417ae8427873f1dd08b70b3574b453e67b5f7f" + integrity sha512-Ys+3g2TaW7gADOJzPt83SJtCDhMjndcDMFVQ/Tj9iA1BfJzFKD9mAUXT3OenpuPHbI6P/myECxRJrofUsDx/5g== + +"@humanwhocodes/config-array@^0.11.14": + version "0.11.14" + resolved "https://registry.yarnpkg.com/@humanwhocodes/config-array/-/config-array-0.11.14.tgz#d78e481a039f7566ecc9660b4ea7fe6b1fec442b" + integrity sha512-3T8LkOmg45BV5FICb15QQMsyUSWrQ8AygVfC7ZG32zOalnqrilm018ZVCw0eapXux8FtA33q8PSRSstjee3jSg== + dependencies: + "@humanwhocodes/object-schema" "^2.0.2" + debug "^4.3.1" + minimatch "^3.0.5" + +"@humanwhocodes/module-importer@^1.0.1": + version "1.0.1" + resolved "https://registry.yarnpkg.com/@humanwhocodes/module-importer/-/module-importer-1.0.1.tgz#af5b2691a22b44be847b0ca81641c5fb6ad0172c" + integrity sha512-bxveV4V8v5Yb4ncFTT3rPSgZBOpCkjfK0y4oVVVJwIuDVBRMDXrPyXRL988i5ap9m9bnyEEjWfm5WkBmtffLfA== + +"@humanwhocodes/object-schema@^2.0.2": + version "2.0.2" + resolved "https://registry.yarnpkg.com/@humanwhocodes/object-schema/-/object-schema-2.0.2.tgz#d9fae00a2d5cb40f92cfe64b47ad749fbc38f917" + integrity sha512-6EwiSjwWYP7pTckG6I5eyFANjPhmPjUX9JRLUSfNPC7FX7zK9gyZAfUEaECL6ALTpGX5AjnBq3C9XmVWPitNpw== + +"@nodelib/fs.scandir@2.1.5": + version "2.1.5" + resolved "https://registry.yarnpkg.com/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz#7619c2eb21b25483f6d167548b4cfd5a7488c3d5" + integrity sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g== + dependencies: + "@nodelib/fs.stat" "2.0.5" + run-parallel "^1.1.9" + +"@nodelib/fs.stat@2.0.5": + version "2.0.5" + resolved "https://registry.yarnpkg.com/@nodelib/fs.stat/-/fs.stat-2.0.5.tgz#5bd262af94e9d25bd1e71b05deed44876a222e8b" + integrity sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A== + +"@nodelib/fs.walk@^1.2.8": + version "1.2.8" + resolved "https://registry.yarnpkg.com/@nodelib/fs.walk/-/fs.walk-1.2.8.tgz#e95737e8bb6746ddedf69c556953494f196fe69a" + integrity sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg== + dependencies: + "@nodelib/fs.scandir" "2.1.5" + fastq "^1.6.0" + +"@types/json5@^0.0.29": + version "0.0.29" + resolved "https://registry.yarnpkg.com/@types/json5/-/json5-0.0.29.tgz#ee28707ae94e11d2b827bcbe5270bcea7f3e71ee" + integrity sha512-dRLjCWHYg4oaA77cxO64oO+7JwCwnIzkZPdrrC71jQmQtlhM556pwKo5bUzqvZndkVbeFLIIi+9TC40JNF5hNQ== + "@types/node@*": version "16.11.11" resolved "https://registry.npmjs.org/@types/node/-/node-16.11.11.tgz" @@ -61,6 +143,21 @@ dependencies: "@types/node" "*" +"@ungap/structured-clone@^1.2.0": + version "1.2.0" + resolved "https://registry.yarnpkg.com/@ungap/structured-clone/-/structured-clone-1.2.0.tgz#756641adb587851b5ccb3e095daf27ae581c8406" + integrity sha512-zuVdFrMJiuCDQUMCzQaD6KL28MjnqqN8XnAqiEq9PNm/hCPTSGfrXCOfwj1ow4LFb/tNymJPwsNbVePc1xFqrQ== + +acorn-jsx@^5.3.2: + version "5.3.2" + resolved "https://registry.yarnpkg.com/acorn-jsx/-/acorn-jsx-5.3.2.tgz#7ed5bb55908b3b2f1bc55c6af1653bada7f07937" + integrity sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ== + +acorn@^8.9.0: + version "8.11.3" + resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.11.3.tgz#71e0b14e13a4ec160724b38fb7b0f233b1b81d7a" + integrity sha512-Y9rRfJG5jcKOE0CLisYbojUjIrIEE7AGMzA/Sm4BslANhbS+cDMpgBdcPT91oJ7OuJ9hYJBx59RjbhxVnrF8Xg== + aggregate-error@^3.0.0: version "3.1.0" resolved "https://registry.npmjs.org/aggregate-error/-/aggregate-error-3.1.0.tgz" @@ -69,6 +166,16 @@ aggregate-error@^3.0.0: clean-stack "^2.0.0" indent-string "^4.0.0" +ajv@^6.12.4: + version "6.12.6" + resolved "https://registry.yarnpkg.com/ajv/-/ajv-6.12.6.tgz#baf5a62e802b07d977034586f8c3baf5adf26df4" + integrity sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g== + dependencies: + fast-deep-equal "^3.1.1" + fast-json-stable-stringify "^2.0.0" + json-schema-traverse "^0.4.1" + uri-js "^4.2.2" + ansi-colors@^4.1.1: version "4.1.1" resolved "https://registry.npmjs.org/ansi-colors/-/ansi-colors-4.1.1.tgz" @@ -98,6 +205,86 @@ arch@^2.2.0: resolved "https://registry.npmjs.org/arch/-/arch-2.2.0.tgz" integrity sha512-Of/R0wqp83cgHozfIYLbBMnej79U/SVGOOyuB3VVFv1NRM/PSFMK12x9KVtiYzJqmnU5WR2qp0Z5rHb7sWGnFQ== +argparse@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/argparse/-/argparse-2.0.1.tgz#246f50f3ca78a3240f6c997e8a9bd1eac49e4b38" + integrity sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q== + +array-buffer-byte-length@^1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/array-buffer-byte-length/-/array-buffer-byte-length-1.0.1.tgz#1e5583ec16763540a27ae52eed99ff899223568f" + integrity sha512-ahC5W1xgou+KTXix4sAO8Ki12Q+jf4i0+tmk3sC+zgcynshkHxzpXdImBehiUYKKKDwvfFiJl1tZt6ewscS1Mg== + dependencies: + call-bind "^1.0.5" + is-array-buffer "^3.0.4" + +array-includes@^3.1.7: + version "3.1.7" + resolved "https://registry.yarnpkg.com/array-includes/-/array-includes-3.1.7.tgz#8cd2e01b26f7a3086cbc87271593fe921c62abda" + integrity sha512-dlcsNBIiWhPkHdOEEKnehA+RNUWDc4UqFtnIXU4uuYDPtA4LDkr7qip2p0VvFAEXNDr0yWZ9PJyIRiGjRLQzwQ== + dependencies: + call-bind "^1.0.2" + define-properties "^1.2.0" + es-abstract "^1.22.1" + get-intrinsic "^1.2.1" + is-string "^1.0.7" + +array.prototype.filter@^1.0.3: + version "1.0.3" + resolved "https://registry.yarnpkg.com/array.prototype.filter/-/array.prototype.filter-1.0.3.tgz#423771edeb417ff5914111fff4277ea0624c0d0e" + integrity sha512-VizNcj/RGJiUyQBgzwxzE5oHdeuXY5hSbbmKMlphj1cy1Vl7Pn2asCGbSrru6hSQjmCzqTBPVWAF/whmEOVHbw== + dependencies: + call-bind "^1.0.2" + define-properties "^1.2.0" + es-abstract "^1.22.1" + es-array-method-boxes-properly "^1.0.0" + is-string "^1.0.7" + +array.prototype.findlastindex@^1.2.3: + version "1.2.4" + resolved "https://registry.yarnpkg.com/array.prototype.findlastindex/-/array.prototype.findlastindex-1.2.4.tgz#d1c50f0b3a9da191981ff8942a0aedd82794404f" + integrity sha512-hzvSHUshSpCflDR1QMUBLHGHP1VIEBegT4pix9H/Z92Xw3ySoy6c2qh7lJWTJnRJ8JCZ9bJNCgTyYaJGcJu6xQ== + dependencies: + call-bind "^1.0.5" + define-properties "^1.2.1" + es-abstract "^1.22.3" + es-errors "^1.3.0" + es-shim-unscopables "^1.0.2" + +array.prototype.flat@^1.3.2: + version "1.3.2" + resolved "https://registry.yarnpkg.com/array.prototype.flat/-/array.prototype.flat-1.3.2.tgz#1476217df8cff17d72ee8f3ba06738db5b387d18" + integrity sha512-djYB+Zx2vLewY8RWlNCUdHjDXs2XOgm602S9E7P/UpHgfeHL00cRiIF+IN/G/aUJ7kGPb6yO/ErDI5V2s8iycA== + dependencies: + call-bind "^1.0.2" + define-properties "^1.2.0" + es-abstract "^1.22.1" + es-shim-unscopables "^1.0.0" + +array.prototype.flatmap@^1.3.2: + version "1.3.2" + resolved "https://registry.yarnpkg.com/array.prototype.flatmap/-/array.prototype.flatmap-1.3.2.tgz#c9a7c6831db8e719d6ce639190146c24bbd3e527" + integrity sha512-Ewyx0c9PmpcsByhSW4r+9zDU7sGjFc86qf/kKtuSCRdhfbk0SNLLkaT5qvcHnRGgc5NP/ly/y+qkXkqONX54CQ== + dependencies: + call-bind "^1.0.2" + define-properties "^1.2.0" + es-abstract "^1.22.1" + es-shim-unscopables "^1.0.0" + +arraybuffer.prototype.slice@^1.0.3: + version "1.0.3" + resolved "https://registry.yarnpkg.com/arraybuffer.prototype.slice/-/arraybuffer.prototype.slice-1.0.3.tgz#097972f4255e41bc3425e37dc3f6421cf9aefde6" + integrity sha512-bMxMKAjg13EBSVscxTaYA4mRc5t1UAXa2kXiGTNfZ079HIWXEkKmkgFrh/nJqamaLSrXO5H4WFFkPEaLJWbs3A== + dependencies: + array-buffer-byte-length "^1.0.1" + call-bind "^1.0.5" + define-properties "^1.2.1" + es-abstract "^1.22.3" + es-errors "^1.2.1" + get-intrinsic "^1.2.3" + is-array-buffer "^3.0.4" + is-shared-array-buffer "^1.0.2" + asn1@~0.2.3: version "0.2.6" resolved "https://registry.npmjs.org/asn1/-/asn1-0.2.6.tgz" @@ -130,6 +317,13 @@ at-least-node@^1.0.0: resolved "https://registry.npmjs.org/at-least-node/-/at-least-node-1.0.0.tgz" integrity sha512-+q/t7Ekv1EDY2l6Gda6LLiX14rU9TV20Wa3ofeQmwPFZbOMo9DXrLbOjFaaclkXKWidIaopwAObQDqwWtGUjqg== +available-typed-arrays@^1.0.6, available-typed-arrays@^1.0.7: + version "1.0.7" + resolved "https://registry.yarnpkg.com/available-typed-arrays/-/available-typed-arrays-1.0.7.tgz#a5cc375d6a03c2efc87a553f3e0b1522def14846" + integrity sha512-wvUjBtSGN7+7SjNpq/9M2Tg350UZD3q62IFZLbRAR1bSMlCo1ZaeW+BJ+D090e4hIIZLBcTDWe4Mh4jvUDajzQ== + dependencies: + possible-typed-array-names "^1.0.0" + aws-sign2@~0.7.0: version "0.7.0" resolved "https://registry.npmjs.org/aws-sign2/-/aws-sign2-0.7.0.tgz" @@ -193,12 +387,28 @@ cachedir@^2.3.0: resolved "https://registry.npmjs.org/cachedir/-/cachedir-2.3.0.tgz" integrity sha512-A+Fezp4zxnit6FanDmv9EqXNAi3vt9DWp51/71UEhXukb7QUuvtv9344h91dyAxuTLoSYJFU299qzR3tzwPAhw== +call-bind@^1.0.2, call-bind@^1.0.5, call-bind@^1.0.6, call-bind@^1.0.7: + version "1.0.7" + resolved "https://registry.yarnpkg.com/call-bind/-/call-bind-1.0.7.tgz#06016599c40c56498c18769d2730be242b6fa3b9" + integrity sha512-GHTSNSYICQ7scH7sZ+M2rFopRoLh8t2bLSW6BbgrtLsahOIB5iyAVJf9GjWK3cYTDaMj4XdBpM1cA6pIS0Kv2w== + dependencies: + es-define-property "^1.0.0" + es-errors "^1.3.0" + function-bind "^1.1.2" + get-intrinsic "^1.2.4" + set-function-length "^1.2.1" + +callsites@^3.0.0: + version "3.1.0" + resolved "https://registry.yarnpkg.com/callsites/-/callsites-3.1.0.tgz#b3630abd8943432f54b3f0519238e33cd7df2f73" + integrity sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ== + caseless@~0.12.0: version "0.12.0" resolved "https://registry.npmjs.org/caseless/-/caseless-0.12.0.tgz" integrity sha1-G2gcIf+EAzyCZUMJBolCDRhxUdw= -chalk@^4.1.0: +chalk@^4.0.0, chalk@^4.1.0: version "4.1.2" resolved "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz" integrity sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA== @@ -289,12 +499,17 @@ concat-map@0.0.1: resolved "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz" integrity sha1-2Klr13/Wjfd5OnMDajug1UBdR3s= +confusing-browser-globals@^1.0.10: + version "1.0.11" + resolved "https://registry.yarnpkg.com/confusing-browser-globals/-/confusing-browser-globals-1.0.11.tgz#ae40e9b57cdd3915408a2805ebd3a5585608dc81" + integrity sha512-JsPKdmh8ZkmnHxDk55FZ1TqVLvEQTvoByJZRN9jzI0UjxK/QgAmsphz7PGtqgPieQZ/CQcHWXCR7ATDNhGe+YA== + core-util-is@1.0.2: version "1.0.2" resolved "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.2.tgz" integrity sha1-tf1UIgqivFq1eqtxQMlAdUUDwac= -cross-spawn@^7.0.0: +cross-spawn@^7.0.0, cross-spawn@^7.0.2: version "7.0.3" resolved "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz" integrity sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w== @@ -375,7 +590,7 @@ dayjs@^1.11.7: resolved "https://registry.yarnpkg.com/dayjs/-/dayjs-1.11.7.tgz#4b296922642f70999544d1144a2c25730fce63e2" integrity sha512-+Yw9U6YO5TQohxLcIkrXBeY73WP3ejHWVvx8XCk3gxvQDCTEmS48ZrSZCKciI7Bhl/uCMyxYtE9UqRILmFphkQ== -debug@^3.1.0: +debug@^3.1.0, debug@^3.2.7: version "3.2.7" resolved "https://registry.npmjs.org/debug/-/debug-3.2.7.tgz" integrity sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ== @@ -389,11 +604,55 @@ debug@^4.1.1, debug@^4.3.2: dependencies: ms "2.1.2" +debug@^4.3.1: + version "4.3.4" + resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.4.tgz#1319f6579357f2338d3337d2cdd4914bb5dcc865" + integrity sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ== + dependencies: + ms "2.1.2" + +deep-is@^0.1.3: + version "0.1.4" + resolved "https://registry.yarnpkg.com/deep-is/-/deep-is-0.1.4.tgz#a6f2dce612fadd2ef1f519b73551f17e85199831" + integrity sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ== + +define-data-property@^1.0.1, define-data-property@^1.1.2, define-data-property@^1.1.4: + version "1.1.4" + resolved "https://registry.yarnpkg.com/define-data-property/-/define-data-property-1.1.4.tgz#894dc141bb7d3060ae4366f6a0107e68fbe48c5e" + integrity sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A== + dependencies: + es-define-property "^1.0.0" + es-errors "^1.3.0" + gopd "^1.0.1" + +define-properties@^1.1.3, define-properties@^1.2.0, define-properties@^1.2.1: + version "1.2.1" + resolved "https://registry.yarnpkg.com/define-properties/-/define-properties-1.2.1.tgz#10781cc616eb951a80a034bafcaa7377f6af2b6c" + integrity sha512-8QmQKqEASLd5nx0U1B1okLElbUuuttJ/AnYmRXbbbGDWh6uS208EjD4Xqq/I9wK7u0v6O08XhTWnt5XtEbR6Dg== + dependencies: + define-data-property "^1.0.1" + has-property-descriptors "^1.0.0" + object-keys "^1.1.1" + delayed-stream@~1.0.0: version "1.0.0" resolved "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz" integrity sha1-3zrhmayt+31ECqrgsp4icrJOxhk= +doctrine@^2.1.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/doctrine/-/doctrine-2.1.0.tgz#5cd01fc101621b42c4cd7f5d1a66243716d3f39d" + integrity sha512-35mSku4ZXK0vfCuHEDAwt55dg2jNajHZ1odvF+8SSr82EsZY4QmXfuWso8oEd8zRhVObSN18aM0CjSdoBX7zIw== + dependencies: + esutils "^2.0.2" + +doctrine@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/doctrine/-/doctrine-3.0.0.tgz#addebead72a6574db783639dc87a121773973961" + integrity sha512-yS+Q5i3hBf7GBkd4KG8a7eBNNWNGLTaEwwYWUijIYM7zrlYDM0BFXHjjPWlWZ1Rg7UaddZeIDmi9jF3HmqiQ2w== + dependencies: + esutils "^2.0.2" + ecc-jsbn@~0.1.1: version "0.1.2" resolved "https://registry.npmjs.org/ecc-jsbn/-/ecc-jsbn-0.1.2.tgz" @@ -421,11 +680,256 @@ enquirer@^2.3.6: dependencies: ansi-colors "^4.1.1" +es-abstract@^1.22.1, es-abstract@^1.22.3: + version "1.22.4" + resolved "https://registry.yarnpkg.com/es-abstract/-/es-abstract-1.22.4.tgz#26eb2e7538c3271141f5754d31aabfdb215f27bf" + integrity sha512-vZYJlk2u6qHYxBOTjAeg7qUxHdNfih64Uu2J8QqWgXZ2cri0ZpJAkzDUK/q593+mvKwlxyaxr6F1Q+3LKoQRgg== + dependencies: + array-buffer-byte-length "^1.0.1" + arraybuffer.prototype.slice "^1.0.3" + available-typed-arrays "^1.0.6" + call-bind "^1.0.7" + es-define-property "^1.0.0" + es-errors "^1.3.0" + es-set-tostringtag "^2.0.2" + es-to-primitive "^1.2.1" + function.prototype.name "^1.1.6" + get-intrinsic "^1.2.4" + get-symbol-description "^1.0.2" + globalthis "^1.0.3" + gopd "^1.0.1" + has-property-descriptors "^1.0.2" + has-proto "^1.0.1" + has-symbols "^1.0.3" + hasown "^2.0.1" + internal-slot "^1.0.7" + is-array-buffer "^3.0.4" + is-callable "^1.2.7" + is-negative-zero "^2.0.2" + is-regex "^1.1.4" + is-shared-array-buffer "^1.0.2" + is-string "^1.0.7" + is-typed-array "^1.1.13" + is-weakref "^1.0.2" + object-inspect "^1.13.1" + object-keys "^1.1.1" + object.assign "^4.1.5" + regexp.prototype.flags "^1.5.2" + safe-array-concat "^1.1.0" + safe-regex-test "^1.0.3" + string.prototype.trim "^1.2.8" + string.prototype.trimend "^1.0.7" + string.prototype.trimstart "^1.0.7" + typed-array-buffer "^1.0.1" + typed-array-byte-length "^1.0.0" + typed-array-byte-offset "^1.0.0" + typed-array-length "^1.0.4" + unbox-primitive "^1.0.2" + which-typed-array "^1.1.14" + +es-array-method-boxes-properly@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/es-array-method-boxes-properly/-/es-array-method-boxes-properly-1.0.0.tgz#873f3e84418de4ee19c5be752990b2e44718d09e" + integrity sha512-wd6JXUmyHmt8T5a2xreUwKcGPq6f1f+WwIJkijUqiGcJz1qqnZgP6XIK+QyIWU5lT7imeNxUll48bziG+TSYcA== + +es-define-property@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/es-define-property/-/es-define-property-1.0.0.tgz#c7faefbdff8b2696cf5f46921edfb77cc4ba3845" + integrity sha512-jxayLKShrEqqzJ0eumQbVhTYQM27CfT1T35+gCgDFoL82JLsXqTJ76zv6A0YLOgEnLUMvLzsDsGIrl8NFpT2gQ== + dependencies: + get-intrinsic "^1.2.4" + +es-errors@^1.0.0, es-errors@^1.2.1, es-errors@^1.3.0: + version "1.3.0" + resolved "https://registry.yarnpkg.com/es-errors/-/es-errors-1.3.0.tgz#05f75a25dab98e4fb1dcd5e1472c0546d5057c8f" + integrity sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw== + +es-set-tostringtag@^2.0.2: + version "2.0.3" + resolved "https://registry.yarnpkg.com/es-set-tostringtag/-/es-set-tostringtag-2.0.3.tgz#8bb60f0a440c2e4281962428438d58545af39777" + integrity sha512-3T8uNMC3OQTHkFUsFq8r/BwAXLHvU/9O9mE0fBc/MY5iq/8H7ncvO947LmYA6ldWw9Uh8Yhf25zu6n7nML5QWQ== + dependencies: + get-intrinsic "^1.2.4" + has-tostringtag "^1.0.2" + hasown "^2.0.1" + +es-shim-unscopables@^1.0.0, es-shim-unscopables@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/es-shim-unscopables/-/es-shim-unscopables-1.0.2.tgz#1f6942e71ecc7835ed1c8a83006d8771a63a3763" + integrity sha512-J3yBRXCzDu4ULnQwxyToo/OjdMx6akgVC7K6few0a7F/0wLtmKKN7I73AH5T2836UuXRqN7Qg+IIUw/+YJksRw== + dependencies: + hasown "^2.0.0" + +es-to-primitive@^1.2.1: + version "1.2.1" + resolved "https://registry.yarnpkg.com/es-to-primitive/-/es-to-primitive-1.2.1.tgz#e55cd4c9cdc188bcefb03b366c736323fc5c898a" + integrity sha512-QCOllgZJtaUo9miYBcLChTUaHNjJF3PYs1VidD7AwiEj1kYxKeQTctLAezAOH5ZKRH0g2IgPn6KwB4IT8iRpvA== + dependencies: + is-callable "^1.1.4" + is-date-object "^1.0.1" + is-symbol "^1.0.2" + escape-string-regexp@^1.0.5: version "1.0.5" resolved "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz" integrity sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ= +escape-string-regexp@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz#14ba83a5d373e3d311e5afca29cf5bfad965bf34" + integrity sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA== + +eslint-config-airbnb-base@^15.0.0: + version "15.0.0" + resolved "https://registry.yarnpkg.com/eslint-config-airbnb-base/-/eslint-config-airbnb-base-15.0.0.tgz#6b09add90ac79c2f8d723a2580e07f3925afd236" + integrity sha512-xaX3z4ZZIcFLvh2oUNvcX5oEofXda7giYmuplVxoOg5A7EXJMrUyqRgR+mhDhPK8LZ4PttFOBvCYDbX3sUoUig== + dependencies: + confusing-browser-globals "^1.0.10" + object.assign "^4.1.2" + object.entries "^1.1.5" + semver "^6.3.0" + +eslint-config-prettier@^9.1.0: + version "9.1.0" + resolved "https://registry.yarnpkg.com/eslint-config-prettier/-/eslint-config-prettier-9.1.0.tgz#31af3d94578645966c082fcb71a5846d3c94867f" + integrity sha512-NSWl5BFQWEPi1j4TjVNItzYV7dZXZ+wP6I6ZhrBGpChQhZRUaElihE9uRRkcbRnNb76UMKDF3r+WTmNcGPKsqw== + +eslint-import-resolver-node@^0.3.9: + version "0.3.9" + resolved "https://registry.yarnpkg.com/eslint-import-resolver-node/-/eslint-import-resolver-node-0.3.9.tgz#d4eaac52b8a2e7c3cd1903eb00f7e053356118ac" + integrity sha512-WFj2isz22JahUv+B788TlO3N6zL3nNJGU8CcZbPZvVEkBPaJdCV4vy5wyghty5ROFbCRnm132v8BScu5/1BQ8g== + dependencies: + debug "^3.2.7" + is-core-module "^2.13.0" + resolve "^1.22.4" + +eslint-module-utils@^2.8.0: + version "2.8.0" + resolved "https://registry.yarnpkg.com/eslint-module-utils/-/eslint-module-utils-2.8.0.tgz#e439fee65fc33f6bba630ff621efc38ec0375c49" + integrity sha512-aWajIYfsqCKRDgUfjEXNN/JlrzauMuSEy5sbd7WXbtW3EH6A6MpwEh42c7qD+MqQo9QMJ6fWLAeIJynx0g6OAw== + dependencies: + debug "^3.2.7" + +eslint-plugin-cypress@^2.15.1: + version "2.15.1" + resolved "https://registry.yarnpkg.com/eslint-plugin-cypress/-/eslint-plugin-cypress-2.15.1.tgz#336afa7e8e27451afaf65aa359c9509e0a4f3a7b" + integrity sha512-eLHLWP5Q+I4j2AWepYq0PgFEei9/s5LvjuSqWrxurkg1YZ8ltxdvMNmdSf0drnsNo57CTgYY/NIHHLRSWejR7w== + dependencies: + globals "^13.20.0" + +eslint-plugin-import@^2.25.2: + version "2.29.1" + resolved "https://registry.yarnpkg.com/eslint-plugin-import/-/eslint-plugin-import-2.29.1.tgz#d45b37b5ef5901d639c15270d74d46d161150643" + integrity sha512-BbPC0cuExzhiMo4Ff1BTVwHpjjv28C5R+btTOGaCRC7UEz801up0JadwkeSk5Ued6TG34uaczuVuH6qyy5YUxw== + dependencies: + array-includes "^3.1.7" + array.prototype.findlastindex "^1.2.3" + array.prototype.flat "^1.3.2" + array.prototype.flatmap "^1.3.2" + debug "^3.2.7" + doctrine "^2.1.0" + eslint-import-resolver-node "^0.3.9" + eslint-module-utils "^2.8.0" + hasown "^2.0.0" + is-core-module "^2.13.1" + is-glob "^4.0.3" + minimatch "^3.1.2" + object.fromentries "^2.0.7" + object.groupby "^1.0.1" + object.values "^1.1.7" + semver "^6.3.1" + tsconfig-paths "^3.15.0" + +eslint-scope@^7.2.2: + version "7.2.2" + resolved "https://registry.yarnpkg.com/eslint-scope/-/eslint-scope-7.2.2.tgz#deb4f92563390f32006894af62a22dba1c46423f" + integrity sha512-dOt21O7lTMhDM+X9mB4GX+DZrZtCUJPL/wlcTqxyrx5IvO0IYtILdtrQGQp+8n5S0gwSVmOf9NQrjMOgfQZlIg== + dependencies: + esrecurse "^4.3.0" + estraverse "^5.2.0" + +eslint-visitor-keys@^3.3.0, eslint-visitor-keys@^3.4.1, eslint-visitor-keys@^3.4.3: + version "3.4.3" + resolved "https://registry.yarnpkg.com/eslint-visitor-keys/-/eslint-visitor-keys-3.4.3.tgz#0cd72fe8550e3c2eae156a96a4dddcd1c8ac5800" + integrity sha512-wpc+LXeiyiisxPlEkUzU6svyS1frIO3Mgxj1fdy7Pm8Ygzguax2N3Fa/D/ag1WqbOprdI+uY6wMUl8/a2G+iag== + +"eslint@^7.32.0 || ^8.2.0": + version "8.57.0" + resolved "https://registry.yarnpkg.com/eslint/-/eslint-8.57.0.tgz#c786a6fd0e0b68941aaf624596fb987089195668" + integrity sha512-dZ6+mexnaTIbSBZWgou51U6OmzIhYM2VcNdtiTtI7qPNZm35Akpr0f6vtw3w1Kmn5PYo+tZVfh13WrhpS6oLqQ== + dependencies: + "@eslint-community/eslint-utils" "^4.2.0" + "@eslint-community/regexpp" "^4.6.1" + "@eslint/eslintrc" "^2.1.4" + "@eslint/js" "8.57.0" + "@humanwhocodes/config-array" "^0.11.14" + "@humanwhocodes/module-importer" "^1.0.1" + "@nodelib/fs.walk" "^1.2.8" + "@ungap/structured-clone" "^1.2.0" + ajv "^6.12.4" + chalk "^4.0.0" + cross-spawn "^7.0.2" + debug "^4.3.2" + doctrine "^3.0.0" + escape-string-regexp "^4.0.0" + eslint-scope "^7.2.2" + eslint-visitor-keys "^3.4.3" + espree "^9.6.1" + esquery "^1.4.2" + esutils "^2.0.2" + fast-deep-equal "^3.1.3" + file-entry-cache "^6.0.1" + find-up "^5.0.0" + glob-parent "^6.0.2" + globals "^13.19.0" + graphemer "^1.4.0" + ignore "^5.2.0" + imurmurhash "^0.1.4" + is-glob "^4.0.0" + is-path-inside "^3.0.3" + js-yaml "^4.1.0" + json-stable-stringify-without-jsonify "^1.0.1" + levn "^0.4.1" + lodash.merge "^4.6.2" + minimatch "^3.1.2" + natural-compare "^1.4.0" + optionator "^0.9.3" + strip-ansi "^6.0.1" + text-table "^0.2.0" + +espree@^9.6.0, espree@^9.6.1: + version "9.6.1" + resolved "https://registry.yarnpkg.com/espree/-/espree-9.6.1.tgz#a2a17b8e434690a5432f2f8018ce71d331a48c6f" + integrity sha512-oruZaFkjorTpF32kDSI5/75ViwGeZginGGy2NoOSg3Q9bnwlnmDm4HLnkl0RE3n+njDXR037aY1+x58Z/zFdwQ== + dependencies: + acorn "^8.9.0" + acorn-jsx "^5.3.2" + eslint-visitor-keys "^3.4.1" + +esquery@^1.4.2: + version "1.5.0" + resolved "https://registry.yarnpkg.com/esquery/-/esquery-1.5.0.tgz#6ce17738de8577694edd7361c57182ac8cb0db0b" + integrity sha512-YQLXUplAwJgCydQ78IMJywZCceoqk1oH01OERdSAJc/7U2AylwjhSCLDEtqwg811idIS/9fIU5GjG73IgjKMVg== + dependencies: + estraverse "^5.1.0" + +esrecurse@^4.3.0: + version "4.3.0" + resolved "https://registry.yarnpkg.com/esrecurse/-/esrecurse-4.3.0.tgz#7ad7964d679abb28bee72cec63758b1c5d2c9921" + integrity sha512-KmfKL3b6G+RXvP8N1vr3Tq1kL/oCFgn2NYXEtqP8/L3pKapUA4G8cFVaoF3SU323CD4XypR/ffioHmkti6/Tag== + dependencies: + estraverse "^5.2.0" + +estraverse@^5.1.0, estraverse@^5.2.0: + version "5.3.0" + resolved "https://registry.yarnpkg.com/estraverse/-/estraverse-5.3.0.tgz#2eea5290702f26ab8fe5370370ff86c965d21123" + integrity sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA== + +esutils@^2.0.2: + version "2.0.3" + resolved "https://registry.yarnpkg.com/esutils/-/esutils-2.0.3.tgz#74d2eb4de0b8da1293711910d50775b9b710ef64" + integrity sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g== + eventemitter2@6.4.7: version "6.4.7" resolved "https://registry.yarnpkg.com/eventemitter2/-/eventemitter2-6.4.7.tgz#a7f6c4d7abf28a14c1ef3442f21cb306a054271d" @@ -479,6 +983,28 @@ extsprintf@^1.2.0: resolved "https://registry.npmjs.org/extsprintf/-/extsprintf-1.4.1.tgz" integrity sha512-Wrk35e8ydCKDj/ArClo1VrPVmN8zph5V4AtHwIuHhvMXsKf73UT3BOD+azBIW+3wOJ4FhEH7zyaJCFvChjYvMA== +fast-deep-equal@^3.1.1, fast-deep-equal@^3.1.3: + version "3.1.3" + resolved "https://registry.yarnpkg.com/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz#3a7d56b559d6cbc3eb512325244e619a65c6c525" + integrity sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q== + +fast-json-stable-stringify@^2.0.0: + version "2.1.0" + resolved "https://registry.yarnpkg.com/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz#874bf69c6f404c2b5d99c481341399fd55892633" + integrity sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw== + +fast-levenshtein@^2.0.6: + version "2.0.6" + resolved "https://registry.yarnpkg.com/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz#3d8a5c66883a16a30ca8643e851f19baa7797917" + integrity sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw== + +fastq@^1.6.0: + version "1.17.1" + resolved "https://registry.yarnpkg.com/fastq/-/fastq-1.17.1.tgz#2a523f07a4e7b1e81a42b91b8bf2254107753b47" + integrity sha512-sRVD3lWVIXWg6By68ZN7vho9a1pQcN/WBFaAAsDDFzlJjvoGx0P8z7V1t72grFJfJhu3YPZBuu25f7Kaw2jN1w== + dependencies: + reusify "^1.0.4" + fd-slicer@~1.1.0: version "1.1.0" resolved "https://registry.npmjs.org/fd-slicer/-/fd-slicer-1.1.0.tgz" @@ -493,6 +1019,42 @@ figures@^3.2.0: dependencies: escape-string-regexp "^1.0.5" +file-entry-cache@^6.0.1: + version "6.0.1" + resolved "https://registry.yarnpkg.com/file-entry-cache/-/file-entry-cache-6.0.1.tgz#211b2dd9659cb0394b073e7323ac3c933d522027" + integrity sha512-7Gps/XWymbLk2QLYK4NzpMOrYjMhdIxXuIvy2QBsLE6ljuodKvdkWs/cpyJJ3CVIVpH0Oi1Hvg1ovbMzLdFBBg== + dependencies: + flat-cache "^3.0.4" + +find-up@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/find-up/-/find-up-5.0.0.tgz#4c92819ecb7083561e4f4a240a86be5198f536fc" + integrity sha512-78/PXT1wlLLDgTzDs7sjq9hzz0vXD+zn+7wypEe4fXQxCmdmqfGsEPQxmiCSQI3ajFV91bVSsvNtrJRiW6nGng== + dependencies: + locate-path "^6.0.0" + path-exists "^4.0.0" + +flat-cache@^3.0.4: + version "3.2.0" + resolved "https://registry.yarnpkg.com/flat-cache/-/flat-cache-3.2.0.tgz#2c0c2d5040c99b1632771a9d105725c0115363ee" + integrity sha512-CYcENa+FtcUKLmhhqyctpclsq7QF38pKjZHsGNiSQF5r4FtoKDWabFDl3hzaEQMvT1LHEysw5twgLvpYYb4vbw== + dependencies: + flatted "^3.2.9" + keyv "^4.5.3" + rimraf "^3.0.2" + +flatted@^3.2.9: + version "3.3.1" + resolved "https://registry.yarnpkg.com/flatted/-/flatted-3.3.1.tgz#21db470729a6734d4997002f439cb308987f567a" + integrity sha512-X8cqMLLie7KsNUDSdzeN8FYK9rEt4Dt67OsG/DNGnYTSDBG4uFAJFBnUeiV+zCVAvwFy56IjM9sH51jVaEhNxw== + +for-each@^0.3.3: + version "0.3.3" + resolved "https://registry.yarnpkg.com/for-each/-/for-each-0.3.3.tgz#69b447e88a0a5d32c3e7084f3f1710034b21376e" + integrity sha512-jqYfLp7mo9vIyQf8ykW2v7A+2N4QjeCeI5+Dz9XraiO1ign81wjiH7Fb9vSOWvQfNtmSa4H2RoQTrrXivdUZmw== + dependencies: + is-callable "^1.1.3" + forever-agent@~0.6.1: version "0.6.1" resolved "https://registry.npmjs.org/forever-agent/-/forever-agent-0.6.1.tgz" @@ -527,6 +1089,37 @@ fs.realpath@^1.0.0: resolved "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz" integrity sha1-FQStJSMVjKpA20onh8sBQRmU6k8= +function-bind@^1.1.2: + version "1.1.2" + resolved "https://registry.yarnpkg.com/function-bind/-/function-bind-1.1.2.tgz#2c02d864d97f3ea6c8830c464cbd11ab6eab7a1c" + integrity sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA== + +function.prototype.name@^1.1.6: + version "1.1.6" + resolved "https://registry.yarnpkg.com/function.prototype.name/-/function.prototype.name-1.1.6.tgz#cdf315b7d90ee77a4c6ee216c3c3362da07533fd" + integrity sha512-Z5kx79swU5P27WEayXM1tBi5Ze/lbIyiNgU3qyXUOf9b2rgXYyF9Dy9Cx+IQv/Lc8WCG6L82zwUPpSS9hGehIg== + dependencies: + call-bind "^1.0.2" + define-properties "^1.2.0" + es-abstract "^1.22.1" + functions-have-names "^1.2.3" + +functions-have-names@^1.2.3: + version "1.2.3" + resolved "https://registry.yarnpkg.com/functions-have-names/-/functions-have-names-1.2.3.tgz#0404fe4ee2ba2f607f0e0ec3c80bae994133b834" + integrity sha512-xckBUXyTIqT97tq2x2AMb+g163b5JFysYk0x4qxNFwbfQkmNZoiRHb6sPzI9/QV33WeuvVYBUIiD4NzNIyqaRQ== + +get-intrinsic@^1.1.3, get-intrinsic@^1.2.1, get-intrinsic@^1.2.2, get-intrinsic@^1.2.3, get-intrinsic@^1.2.4: + version "1.2.4" + resolved "https://registry.yarnpkg.com/get-intrinsic/-/get-intrinsic-1.2.4.tgz#e385f5a4b5227d449c3eabbad05494ef0abbeadd" + integrity sha512-5uYhsJH8VJBTv7oslg4BznJYhDoRI6waYCxMmCdnTrcCrHA/fCFKoTFz2JKKE0HdDFUF7/oQuhzumXJK7paBRQ== + dependencies: + es-errors "^1.3.0" + function-bind "^1.1.2" + has-proto "^1.0.1" + has-symbols "^1.0.3" + hasown "^2.0.0" + get-stream@^5.0.0, get-stream@^5.1.0: version "5.2.0" resolved "https://registry.npmjs.org/get-stream/-/get-stream-5.2.0.tgz" @@ -534,6 +1127,15 @@ get-stream@^5.0.0, get-stream@^5.1.0: dependencies: pump "^3.0.0" +get-symbol-description@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/get-symbol-description/-/get-symbol-description-1.0.2.tgz#533744d5aa20aca4e079c8e5daf7fd44202821f5" + integrity sha512-g0QYk1dZBxGwk+Ngc+ltRH2IBp2f7zBkBMBJZCDerh6EhlhSR6+9irMCuT/09zD6qkarHUSn529sK/yL4S27mg== + dependencies: + call-bind "^1.0.5" + es-errors "^1.3.0" + get-intrinsic "^1.2.4" + getos@^3.2.1: version "3.2.1" resolved "https://registry.npmjs.org/getos/-/getos-3.2.1.tgz" @@ -548,6 +1150,13 @@ getpass@^0.1.1: dependencies: assert-plus "^1.0.0" +glob-parent@^6.0.2: + version "6.0.2" + resolved "https://registry.yarnpkg.com/glob-parent/-/glob-parent-6.0.2.tgz#6d237d99083950c79290f24c7642a3de9a28f9e3" + integrity sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A== + dependencies: + is-glob "^4.0.3" + glob@^7.1.3: version "7.2.0" resolved "https://registry.npmjs.org/glob/-/glob-7.2.0.tgz" @@ -567,16 +1176,78 @@ global-dirs@^3.0.0: dependencies: ini "2.0.0" +globals@^13.19.0, globals@^13.20.0: + version "13.24.0" + resolved "https://registry.yarnpkg.com/globals/-/globals-13.24.0.tgz#8432a19d78ce0c1e833949c36adb345400bb1171" + integrity sha512-AhO5QUcj8llrbG09iWhPU2B204J1xnPeL8kQmVorSsy+Sjj1sk8gIyh6cUocGmH4L0UuhAJy+hJMRA4mgA4mFQ== + dependencies: + type-fest "^0.20.2" + +globalthis@^1.0.3: + version "1.0.3" + resolved "https://registry.yarnpkg.com/globalthis/-/globalthis-1.0.3.tgz#5852882a52b80dc301b0660273e1ed082f0b6ccf" + integrity sha512-sFdI5LyBiNTHjRd7cGPWapiHWMOXKyuBNX/cWJ3NfzrZQVa8GI/8cofCl74AOVqq9W5kNmguTIzJ/1s2gyI9wA== + dependencies: + define-properties "^1.1.3" + +gopd@^1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/gopd/-/gopd-1.0.1.tgz#29ff76de69dac7489b7c0918a5788e56477c332c" + integrity sha512-d65bNlIadxvpb/A2abVdlqKqV563juRnZ1Wtk6s1sIR8uNsXR70xqIzVqxVf1eTqDunwT2MkczEeaezCKTZhwA== + dependencies: + get-intrinsic "^1.1.3" + graceful-fs@^4.1.6, graceful-fs@^4.2.0: version "4.2.8" resolved "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.8.tgz" integrity sha512-qkIilPUYcNhJpd33n0GBXTB1MMPp14TxEsEs0pTrsSVucApsYzW5V+Q8Qxhik6KU3evy+qkAAowTByymK0avdg== +graphemer@^1.4.0: + version "1.4.0" + resolved "https://registry.yarnpkg.com/graphemer/-/graphemer-1.4.0.tgz#fb2f1d55e0e3a1849aeffc90c4fa0dd53a0e66c6" + integrity sha512-EtKwoO6kxCL9WO5xipiHTZlSzBm7WLT627TqC/uVRd0HKmq8NXyebnNYxDoBi7wt8eTWrUrKXCOVaFq9x1kgag== + +has-bigints@^1.0.1, has-bigints@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/has-bigints/-/has-bigints-1.0.2.tgz#0871bd3e3d51626f6ca0966668ba35d5602d6eaa" + integrity sha512-tSvCKtBr9lkF0Ex0aQiP9N+OpV4zi2r/Nee5VkRDbaqv35RLYMzbwQfFSZZH0kR+Rd6302UJZ2p/bJCEoR3VoQ== + has-flag@^4.0.0: version "4.0.0" resolved "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz" integrity sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ== +has-property-descriptors@^1.0.0, has-property-descriptors@^1.0.1, has-property-descriptors@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/has-property-descriptors/-/has-property-descriptors-1.0.2.tgz#963ed7d071dc7bf5f084c5bfbe0d1b6222586854" + integrity sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg== + dependencies: + es-define-property "^1.0.0" + +has-proto@^1.0.1, has-proto@^1.0.3: + version "1.0.3" + resolved "https://registry.yarnpkg.com/has-proto/-/has-proto-1.0.3.tgz#b31ddfe9b0e6e9914536a6ab286426d0214f77fd" + integrity sha512-SJ1amZAJUiZS+PhsVLf5tGydlaVB8EdFpaSO4gmiUKUOxk8qzn5AIy4ZeJUmh22znIdk/uMAUT2pl3FxzVUH+Q== + +has-symbols@^1.0.2, has-symbols@^1.0.3: + version "1.0.3" + resolved "https://registry.yarnpkg.com/has-symbols/-/has-symbols-1.0.3.tgz#bb7b2c4349251dce87b125f7bdf874aa7c8b39f8" + integrity sha512-l3LCuF6MgDNwTDKkdYGEihYjt5pRPbEg46rtlmnSPlUbgmB8LOIrKJbYYFBSbnPaJexMKtiPO8hmeRjRz2Td+A== + +has-tostringtag@^1.0.0, has-tostringtag@^1.0.1, has-tostringtag@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/has-tostringtag/-/has-tostringtag-1.0.2.tgz#2cdc42d40bef2e5b4eeab7c01a73c54ce7ab5abc" + integrity sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw== + dependencies: + has-symbols "^1.0.3" + +hasown@^2.0.0, hasown@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/hasown/-/hasown-2.0.1.tgz#26f48f039de2c0f8d3356c223fb8d50253519faa" + integrity sha512-1/th4MHjnwncwXsIW6QMzlvYL9kG5e/CpVvLRZe4XPa8TOUNbCELqmvhDmnkNsAjwaG4+I8gJJL0JBvTTLO9qA== + dependencies: + function-bind "^1.1.2" + http-signature@~1.3.6: version "1.3.6" resolved "https://registry.npmjs.org/http-signature/-/http-signature-1.3.6.tgz" @@ -596,6 +1267,24 @@ ieee754@^1.1.13: resolved "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz" integrity sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA== +ignore@^5.2.0: + version "5.3.1" + resolved "https://registry.yarnpkg.com/ignore/-/ignore-5.3.1.tgz#5073e554cd42c5b33b394375f538b8593e34d4ef" + integrity sha512-5Fytz/IraMjqpwfd34ke28PTVMjZjJG2MPn5t7OE4eUCUNf8BAa7b5WUS9/Qvr6mwOQS7Mk6vdsMno5he+T8Xw== + +import-fresh@^3.2.1: + version "3.3.0" + resolved "https://registry.yarnpkg.com/import-fresh/-/import-fresh-3.3.0.tgz#37162c25fcb9ebaa2e6e53d5b4d88ce17d9e0c2b" + integrity sha512-veYYhQa+D1QBKznvhUHxb8faxlrwUnxseDAbAp457E0wLNio2bOSKnjYDhMj+YiAq61xrMGhQk9iXVk5FzgQMw== + dependencies: + parent-module "^1.0.0" + resolve-from "^4.0.0" + +imurmurhash@^0.1.4: + version "0.1.4" + resolved "https://registry.yarnpkg.com/imurmurhash/-/imurmurhash-0.1.4.tgz#9218b9b2b928a238b13dc4fb6b6d576f231453ea" + integrity sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA== + indent-string@^4.0.0: version "4.0.0" resolved "https://registry.npmjs.org/indent-string/-/indent-string-4.0.0.tgz" @@ -619,6 +1308,43 @@ ini@2.0.0: resolved "https://registry.npmjs.org/ini/-/ini-2.0.0.tgz" integrity sha512-7PnF4oN3CvZF23ADhA5wRaYEQpJ8qygSkbtTXWBeXWXmEVRXK+1ITciHWwHhsjv1TmW0MgacIv6hEi5pX5NQdA== +internal-slot@^1.0.7: + version "1.0.7" + resolved "https://registry.yarnpkg.com/internal-slot/-/internal-slot-1.0.7.tgz#c06dcca3ed874249881007b0a5523b172a190802" + integrity sha512-NGnrKwXzSms2qUUih/ILZ5JBqNTSa1+ZmP6flaIp6KmSElgE9qdndzS3cqjrDovwFdmwsGsLdeFgB6suw+1e9g== + dependencies: + es-errors "^1.3.0" + hasown "^2.0.0" + side-channel "^1.0.4" + +is-array-buffer@^3.0.4: + version "3.0.4" + resolved "https://registry.yarnpkg.com/is-array-buffer/-/is-array-buffer-3.0.4.tgz#7a1f92b3d61edd2bc65d24f130530ea93d7fae98" + integrity sha512-wcjaerHw0ydZwfhiKbXJWLDY8A7yV7KhjQOpb83hGgGfId/aQa4TOvwyzn2PuswW2gPCYEL/nEAiSVpdOj1lXw== + dependencies: + call-bind "^1.0.2" + get-intrinsic "^1.2.1" + +is-bigint@^1.0.1: + version "1.0.4" + resolved "https://registry.yarnpkg.com/is-bigint/-/is-bigint-1.0.4.tgz#08147a1875bc2b32005d41ccd8291dffc6691df3" + integrity sha512-zB9CruMamjym81i2JZ3UMn54PKGsQzsJeo6xvN3HJJ4CAsQNB6iRutp2To77OfCNuoxspsIhzaPoO1zyCEhFOg== + dependencies: + has-bigints "^1.0.1" + +is-boolean-object@^1.1.0: + version "1.1.2" + resolved "https://registry.yarnpkg.com/is-boolean-object/-/is-boolean-object-1.1.2.tgz#5c6dc200246dd9321ae4b885a114bb1f75f63719" + integrity sha512-gDYaKHJmnj4aWxyj6YHyXVpdQawtVLHU5cb+eztPGczf6cjuTdwve5ZIEfgXqH4e57An1D1AKf8CZ3kYrQRqYA== + dependencies: + call-bind "^1.0.2" + has-tostringtag "^1.0.0" + +is-callable@^1.1.3, is-callable@^1.1.4, is-callable@^1.2.7: + version "1.2.7" + resolved "https://registry.yarnpkg.com/is-callable/-/is-callable-1.2.7.tgz#3bc2a85ea742d9e36205dcacdd72ca1fdc51b055" + integrity sha512-1BC0BVFhS/p0qtw6enp8e+8OD0UrK0oFLztSjNzhcKA3WDuJxxAPXzPuPtKkjEY9UUoEWlX/8fgKeu2S8i9JTA== + is-ci@^3.0.0: version "3.0.1" resolved "https://registry.npmjs.org/is-ci/-/is-ci-3.0.1.tgz" @@ -626,11 +1352,37 @@ is-ci@^3.0.0: dependencies: ci-info "^3.2.0" +is-core-module@^2.13.0, is-core-module@^2.13.1: + version "2.13.1" + resolved "https://registry.yarnpkg.com/is-core-module/-/is-core-module-2.13.1.tgz#ad0d7532c6fea9da1ebdc82742d74525c6273384" + integrity sha512-hHrIjvZsftOsvKSn2TRYl63zvxsgE0K+0mYMoH6gD4omR5IWB2KynivBQczo3+wF1cCkjzvptnI9Q0sPU66ilw== + dependencies: + hasown "^2.0.0" + +is-date-object@^1.0.1: + version "1.0.5" + resolved "https://registry.yarnpkg.com/is-date-object/-/is-date-object-1.0.5.tgz#0841d5536e724c25597bf6ea62e1bd38298df31f" + integrity sha512-9YQaSxsAiSwcvS33MBk3wTCVnWK+HhF8VZR2jRxehM16QcVOdHqPn4VPHmRK4lSr38n9JriurInLcP90xsYNfQ== + dependencies: + has-tostringtag "^1.0.0" + +is-extglob@^2.1.1: + version "2.1.1" + resolved "https://registry.yarnpkg.com/is-extglob/-/is-extglob-2.1.1.tgz#a88c02535791f02ed37c76a1b9ea9773c833f8c2" + integrity sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ== + is-fullwidth-code-point@^3.0.0: version "3.0.0" resolved "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz" integrity sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg== +is-glob@^4.0.0, is-glob@^4.0.3: + version "4.0.3" + resolved "https://registry.yarnpkg.com/is-glob/-/is-glob-4.0.3.tgz#64f61e42cbbb2eec2071a9dac0b28ba1e65d5084" + integrity sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg== + dependencies: + is-extglob "^2.1.1" + is-installed-globally@~0.4.0: version "0.4.0" resolved "https://registry.npmjs.org/is-installed-globally/-/is-installed-globally-0.4.0.tgz" @@ -639,16 +1391,64 @@ is-installed-globally@~0.4.0: global-dirs "^3.0.0" is-path-inside "^3.0.2" -is-path-inside@^3.0.2: +is-negative-zero@^2.0.2: + version "2.0.3" + resolved "https://registry.yarnpkg.com/is-negative-zero/-/is-negative-zero-2.0.3.tgz#ced903a027aca6381b777a5743069d7376a49747" + integrity sha512-5KoIu2Ngpyek75jXodFvnafB6DJgr3u8uuK0LEZJjrU19DrMD3EVERaR8sjz8CCGgpZvxPl9SuE1GMVPFHx1mw== + +is-number-object@^1.0.4: + version "1.0.7" + resolved "https://registry.yarnpkg.com/is-number-object/-/is-number-object-1.0.7.tgz#59d50ada4c45251784e9904f5246c742f07a42fc" + integrity sha512-k1U0IRzLMo7ZlYIfzRu23Oh6MiIFasgpb9X76eqfFZAqwH44UI4KTBvBYIZ1dSL9ZzChTB9ShHfLkR4pdW5krQ== + dependencies: + has-tostringtag "^1.0.0" + +is-path-inside@^3.0.2, is-path-inside@^3.0.3: version "3.0.3" resolved "https://registry.npmjs.org/is-path-inside/-/is-path-inside-3.0.3.tgz" integrity sha512-Fd4gABb+ycGAmKou8eMftCupSir5lRxqf4aD/vd0cD2qc4HL07OjCeuHMr8Ro4CoMaeCKDB0/ECBOVWjTwUvPQ== +is-regex@^1.1.4: + version "1.1.4" + resolved "https://registry.yarnpkg.com/is-regex/-/is-regex-1.1.4.tgz#eef5663cd59fa4c0ae339505323df6854bb15958" + integrity sha512-kvRdxDsxZjhzUX07ZnLydzS1TU/TJlTUHHY4YLL87e37oUA49DfkLqgy+VjFocowy29cKvcSiu+kIv728jTTVg== + dependencies: + call-bind "^1.0.2" + has-tostringtag "^1.0.0" + +is-shared-array-buffer@^1.0.2: + version "1.0.3" + resolved "https://registry.yarnpkg.com/is-shared-array-buffer/-/is-shared-array-buffer-1.0.3.tgz#1237f1cba059cdb62431d378dcc37d9680181688" + integrity sha512-nA2hv5XIhLR3uVzDDfCIknerhx8XUKnstuOERPNNIinXG7v9u+ohXF67vxm4TPTEPU6lm61ZkwP3c9PCB97rhg== + dependencies: + call-bind "^1.0.7" + is-stream@^2.0.0: version "2.0.1" resolved "https://registry.npmjs.org/is-stream/-/is-stream-2.0.1.tgz" integrity sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg== +is-string@^1.0.5, is-string@^1.0.7: + version "1.0.7" + resolved "https://registry.yarnpkg.com/is-string/-/is-string-1.0.7.tgz#0dd12bf2006f255bb58f695110eff7491eebc0fd" + integrity sha512-tE2UXzivje6ofPW7l23cjDOMa09gb7xlAqG6jG5ej6uPV32TlWP3NKPigtaGeHNu9fohccRYvIiZMfOOnOYUtg== + dependencies: + has-tostringtag "^1.0.0" + +is-symbol@^1.0.2, is-symbol@^1.0.3: + version "1.0.4" + resolved "https://registry.yarnpkg.com/is-symbol/-/is-symbol-1.0.4.tgz#a6dac93b635b063ca6872236de88910a57af139c" + integrity sha512-C/CPBqKWnvdcxqIARxyOh4v1UUEOCHpgDa0WYgpKDFMszcrPcffg5uhwSgPCLD2WWxmq6isisz87tzT01tuGhg== + dependencies: + has-symbols "^1.0.2" + +is-typed-array@^1.1.13: + version "1.1.13" + resolved "https://registry.yarnpkg.com/is-typed-array/-/is-typed-array-1.1.13.tgz#d6c5ca56df62334959322d7d7dd1cca50debe229" + integrity sha512-uZ25/bUAlUY5fR4OKT4rZQEBrzQWYV9ZJYGGsUmEJ6thodVJ1HX64ePQ6Z0qPWP+m+Uq6e9UugrE38jeYsDSMw== + dependencies: + which-typed-array "^1.1.14" + is-typedarray@~1.0.0: version "1.0.0" resolved "https://registry.npmjs.org/is-typedarray/-/is-typedarray-1.0.0.tgz" @@ -659,6 +1459,18 @@ is-unicode-supported@^0.1.0: resolved "https://registry.npmjs.org/is-unicode-supported/-/is-unicode-supported-0.1.0.tgz" integrity sha512-knxG2q4UC3u8stRGyAVJCOdxFmv5DZiRcdlIaAQXAbSfJya+OhopNotLQrstBhququ4ZpuKbDc/8S6mgXgPFPw== +is-weakref@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/is-weakref/-/is-weakref-1.0.2.tgz#9529f383a9338205e89765e0392efc2f100f06f2" + integrity sha512-qctsuLZmIQ0+vSSMfoVvyFe2+GSEvnmZ2ezTup1SBse9+twCCeial6EEi3Nc2KFcf6+qz2FBPnjXsk8xhKSaPQ== + dependencies: + call-bind "^1.0.2" + +isarray@^2.0.5: + version "2.0.5" + resolved "https://registry.yarnpkg.com/isarray/-/isarray-2.0.5.tgz#8af1e4c1221244cc62459faf38940d4e644a5723" + integrity sha512-xHjhDr3cNBK0BzdUJSPXZntQUx/mwMS5Rw4A7lPJ90XGAO6ISP/ePDNuo0vhqOZU+UD5JoodwCAAoZQd3FeAKw== + isexe@^2.0.0: version "2.0.0" resolved "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz" @@ -669,21 +1481,50 @@ isstream@~0.1.2: resolved "https://registry.npmjs.org/isstream/-/isstream-0.1.2.tgz" integrity sha1-R+Y/evVa+m+S4VAOaQ64uFKcCZo= +js-yaml@^4.1.0: + version "4.1.0" + resolved "https://registry.yarnpkg.com/js-yaml/-/js-yaml-4.1.0.tgz#c1fb65f8f5017901cdd2c951864ba18458a10602" + integrity sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA== + dependencies: + argparse "^2.0.1" + jsbn@~0.1.0: version "0.1.1" resolved "https://registry.npmjs.org/jsbn/-/jsbn-0.1.1.tgz" integrity sha1-peZUwuWi3rXyAdls77yoDA7y9RM= +json-buffer@3.0.1: + version "3.0.1" + resolved "https://registry.yarnpkg.com/json-buffer/-/json-buffer-3.0.1.tgz#9338802a30d3b6605fbe0613e094008ca8c05a13" + integrity sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ== + +json-schema-traverse@^0.4.1: + version "0.4.1" + resolved "https://registry.yarnpkg.com/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz#69f6a87d9513ab8bb8fe63bdb0979c448e684660" + integrity sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg== + json-schema@0.4.0: version "0.4.0" resolved "https://registry.npmjs.org/json-schema/-/json-schema-0.4.0.tgz" integrity sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA== +json-stable-stringify-without-jsonify@^1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz#9db7b59496ad3f3cfef30a75142d2d930ad72651" + integrity sha512-Bdboy+l7tA3OGW6FjyFHWkP5LuByj1Tk33Ljyq0axyzdk9//JSi2u3fP1QSmd1KNwq6VOKYGlAu87CisVir6Pw== + json-stringify-safe@~5.0.1: version "5.0.1" resolved "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz" integrity sha1-Epai1Y/UXxmg9s4B1lcB4sc1tus= +json5@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/json5/-/json5-1.0.2.tgz#63d98d60f21b313b77c4d6da18bfa69d80e1d593" + integrity sha512-g1MWMLBiz8FKi1e4w0UyVL3w+iJceWAFBAaBnnGKOpNa5f8TLktkbre1+s6oICydWAm+HRUGTmI+//xv2hvXYA== + dependencies: + minimist "^1.2.0" + jsonfile@^6.0.1: version "6.1.0" resolved "https://registry.npmjs.org/jsonfile/-/jsonfile-6.1.0.tgz" @@ -703,11 +1544,26 @@ jsprim@^2.0.2: json-schema "0.4.0" verror "1.10.0" +keyv@^4.5.3: + version "4.5.4" + resolved "https://registry.yarnpkg.com/keyv/-/keyv-4.5.4.tgz#a879a99e29452f942439f2a405e3af8b31d4de93" + integrity sha512-oxVHkHR/EJf2CNXnWxRLW6mg7JyCCUcG0DtEGmL2ctUo1PNTin1PUil+r/+4r5MpVgC/fn1kjsx7mjSujKqIpw== + dependencies: + json-buffer "3.0.1" + lazy-ass@^1.6.0: version "1.6.0" resolved "https://registry.npmjs.org/lazy-ass/-/lazy-ass-1.6.0.tgz" integrity sha1-eZllXoZGwX8In90YfRUNMyTVRRM= +levn@^0.4.1: + version "0.4.1" + resolved "https://registry.yarnpkg.com/levn/-/levn-0.4.1.tgz#ae4562c007473b932a6200d403268dd2fffc6ade" + integrity sha512-+bT2uH4E5LGE7h/n3evcS/sQlJXCpIp6ym8OWJ5eV6+67Dsql/LaaT7qJBAt2rzfoa/5QBGBhxDix1dMt2kQKQ== + dependencies: + prelude-ls "^1.2.1" + type-check "~0.4.0" + listr2@^3.8.3: version "3.13.5" resolved "https://registry.npmjs.org/listr2/-/listr2-3.13.5.tgz" @@ -722,6 +1578,18 @@ listr2@^3.8.3: through "^2.3.8" wrap-ansi "^7.0.0" +locate-path@^6.0.0: + version "6.0.0" + resolved "https://registry.yarnpkg.com/locate-path/-/locate-path-6.0.0.tgz#55321eb309febbc59c4801d931a72452a681d286" + integrity sha512-iPZK6eYjbxRu3uB4/WZ3EsEIMJFMqAoopl3R+zuq0UjcAm/MO6KCweDgPfP3elTztoKP3KtnVHxTn2NHBSDVUw== + dependencies: + p-locate "^5.0.0" + +lodash.merge@^4.6.2: + version "4.6.2" + resolved "https://registry.yarnpkg.com/lodash.merge/-/lodash.merge-4.6.2.tgz#558aa53b43b661e1925a0afdfa36a9a1085fe57a" + integrity sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ== + lodash.once@^4.1.1: version "4.1.1" resolved "https://registry.npmjs.org/lodash.once/-/lodash.once-4.1.1.tgz" @@ -779,13 +1647,18 @@ mimic-fn@^2.1.0: resolved "https://registry.npmjs.org/mimic-fn/-/mimic-fn-2.1.0.tgz" integrity sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg== -minimatch@^3.0.4: +minimatch@^3.0.4, minimatch@^3.0.5, minimatch@^3.1.2: version "3.1.2" resolved "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz" integrity sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw== dependencies: brace-expansion "^1.1.7" +minimist@^1.2.0: + version "1.2.8" + resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.8.tgz#c1a464e7693302e082a075cee0c057741ac4772c" + integrity sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA== + minimist@^1.2.6: version "1.2.7" resolved "https://registry.npmjs.org/minimist/-/minimist-1.2.7.tgz" @@ -801,6 +1674,11 @@ ms@^2.1.1: resolved "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz" integrity sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA== +natural-compare@^1.4.0: + version "1.4.0" + resolved "https://registry.yarnpkg.com/natural-compare/-/natural-compare-1.4.0.tgz#4abebfeed7541f2c27acfb29bdbbd15c8d5ba4f7" + integrity sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw== + npm-run-path@^4.0.0: version "4.0.1" resolved "https://registry.npmjs.org/npm-run-path/-/npm-run-path-4.0.1.tgz" @@ -808,6 +1686,64 @@ npm-run-path@^4.0.0: dependencies: path-key "^3.0.0" +object-inspect@^1.13.1: + version "1.13.1" + resolved "https://registry.yarnpkg.com/object-inspect/-/object-inspect-1.13.1.tgz#b96c6109324ccfef6b12216a956ca4dc2ff94bc2" + integrity sha512-5qoj1RUiKOMsCCNLV1CBiPYE10sziTsnmNxkAI/rZhiD63CF7IqdFGC/XzjWjpSgLf0LxXX3bDFIh0E18f6UhQ== + +object-keys@^1.1.1: + version "1.1.1" + resolved "https://registry.yarnpkg.com/object-keys/-/object-keys-1.1.1.tgz#1c47f272df277f3b1daf061677d9c82e2322c60e" + integrity sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA== + +object.assign@^4.1.2, object.assign@^4.1.5: + version "4.1.5" + resolved "https://registry.yarnpkg.com/object.assign/-/object.assign-4.1.5.tgz#3a833f9ab7fdb80fc9e8d2300c803d216d8fdbb0" + integrity sha512-byy+U7gp+FVwmyzKPYhW2h5l3crpmGsxl7X2s8y43IgxvG4g3QZ6CffDtsNQy1WsmZpQbO+ybo0AlW7TY6DcBQ== + dependencies: + call-bind "^1.0.5" + define-properties "^1.2.1" + has-symbols "^1.0.3" + object-keys "^1.1.1" + +object.entries@^1.1.5: + version "1.1.7" + resolved "https://registry.yarnpkg.com/object.entries/-/object.entries-1.1.7.tgz#2b47760e2a2e3a752f39dd874655c61a7f03c131" + integrity sha512-jCBs/0plmPsOnrKAfFQXRG2NFjlhZgjjcBLSmTnEhU8U6vVTsVe8ANeQJCHTl3gSsI4J+0emOoCgoKlmQPMgmA== + dependencies: + call-bind "^1.0.2" + define-properties "^1.2.0" + es-abstract "^1.22.1" + +object.fromentries@^2.0.7: + version "2.0.7" + resolved "https://registry.yarnpkg.com/object.fromentries/-/object.fromentries-2.0.7.tgz#71e95f441e9a0ea6baf682ecaaf37fa2a8d7e616" + integrity sha512-UPbPHML6sL8PI/mOqPwsH4G6iyXcCGzLin8KvEPenOZN5lpCNBZZQ+V62vdjB1mQHrmqGQt5/OJzemUA+KJmEA== + dependencies: + call-bind "^1.0.2" + define-properties "^1.2.0" + es-abstract "^1.22.1" + +object.groupby@^1.0.1: + version "1.0.2" + resolved "https://registry.yarnpkg.com/object.groupby/-/object.groupby-1.0.2.tgz#494800ff5bab78fd0eff2835ec859066e00192ec" + integrity sha512-bzBq58S+x+uo0VjurFT0UktpKHOZmv4/xePiOA1nbB9pMqpGK7rUPNgf+1YC+7mE+0HzhTMqNUuCqvKhj6FnBw== + dependencies: + array.prototype.filter "^1.0.3" + call-bind "^1.0.5" + define-properties "^1.2.1" + es-abstract "^1.22.3" + es-errors "^1.0.0" + +object.values@^1.1.7: + version "1.1.7" + resolved "https://registry.yarnpkg.com/object.values/-/object.values-1.1.7.tgz#617ed13272e7e1071b43973aa1655d9291b8442a" + integrity sha512-aU6xnDFYT3x17e/f0IiiwlGPTy2jzMySGfUB4fq6z7CV8l85CWHDk5ErhyhpfDHhrOMwGFhSQkhMGHaIotA6Ng== + dependencies: + call-bind "^1.0.2" + define-properties "^1.2.0" + es-abstract "^1.22.1" + once@^1.3.0, once@^1.3.1, once@^1.4.0: version "1.4.0" resolved "https://registry.npmjs.org/once/-/once-1.4.0.tgz" @@ -822,11 +1758,37 @@ onetime@^5.1.0: dependencies: mimic-fn "^2.1.0" +optionator@^0.9.3: + version "0.9.3" + resolved "https://registry.yarnpkg.com/optionator/-/optionator-0.9.3.tgz#007397d44ed1872fdc6ed31360190f81814e2c64" + integrity sha512-JjCoypp+jKn1ttEFExxhetCKeJt9zhAgAve5FXHixTvFDW/5aEktX9bufBKLRRMdU7bNtpLfcGu94B3cdEJgjg== + dependencies: + "@aashutoshrathi/word-wrap" "^1.2.3" + deep-is "^0.1.3" + fast-levenshtein "^2.0.6" + levn "^0.4.1" + prelude-ls "^1.2.1" + type-check "^0.4.0" + ospath@^1.2.2: version "1.2.2" resolved "https://registry.npmjs.org/ospath/-/ospath-1.2.2.tgz" integrity sha1-EnZjl3Sj+O8lcvf+QoDg6kVQwHs= +p-limit@^3.0.2: + version "3.1.0" + resolved "https://registry.yarnpkg.com/p-limit/-/p-limit-3.1.0.tgz#e1daccbe78d0d1388ca18c64fea38e3e57e3706b" + integrity sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ== + dependencies: + yocto-queue "^0.1.0" + +p-locate@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/p-locate/-/p-locate-5.0.0.tgz#83c8315c6785005e3bd021839411c9e110e6d834" + integrity sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw== + dependencies: + p-limit "^3.0.2" + p-map@^4.0.0: version "4.0.0" resolved "https://registry.npmjs.org/p-map/-/p-map-4.0.0.tgz" @@ -834,6 +1796,18 @@ p-map@^4.0.0: dependencies: aggregate-error "^3.0.0" +parent-module@^1.0.0: + version "1.0.1" + resolved "https://registry.yarnpkg.com/parent-module/-/parent-module-1.0.1.tgz#691d2709e78c79fae3a156622452d00762caaaa2" + integrity sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g== + dependencies: + callsites "^3.0.0" + +path-exists@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/path-exists/-/path-exists-4.0.0.tgz#513bdbe2d3b95d7762e8c1137efa195c6c61b5b3" + integrity sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w== + path-is-absolute@^1.0.0: version "1.0.1" resolved "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz" @@ -844,6 +1818,11 @@ path-key@^3.0.0, path-key@^3.1.0: resolved "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz" integrity sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q== +path-parse@^1.0.7: + version "1.0.7" + resolved "https://registry.yarnpkg.com/path-parse/-/path-parse-1.0.7.tgz#fbc114b60ca42b30d9daf5858e4bd68bbedb6735" + integrity sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw== + pend@~1.2.0: version "1.2.0" resolved "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz" @@ -859,6 +1838,21 @@ pify@^2.2.0: resolved "https://registry.npmjs.org/pify/-/pify-2.3.0.tgz" integrity sha1-7RQaasBDqEnqWISY59yosVMw6Qw= +possible-typed-array-names@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/possible-typed-array-names/-/possible-typed-array-names-1.0.0.tgz#89bb63c6fada2c3e90adc4a647beeeb39cc7bf8f" + integrity sha512-d7Uw+eZoloe0EHDIYoe+bQ5WXnGMOpmiZFTuMWCwpjzzkL2nTjcKiAk4hh8TjnGye2TwWOk3UXucZ+3rbmBa8Q== + +prelude-ls@^1.2.1: + version "1.2.1" + resolved "https://registry.yarnpkg.com/prelude-ls/-/prelude-ls-1.2.1.tgz#debc6489d7a6e6b0e7611888cec880337d316396" + integrity sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g== + +prettier@^3.2.5: + version "3.2.5" + resolved "https://registry.yarnpkg.com/prettier/-/prettier-3.2.5.tgz#e52bc3090586e824964a8813b09aba6233b28368" + integrity sha512-3/GWa9aOC0YeD7LUfvOG2NiDyhOWRvt1k+rcKhOuYnMY24iiCphgneUfJDyFXd6rZCAnuLBv6UeAULtrhT/F4A== + pretty-bytes@^5.6.0: version "5.6.0" resolved "https://registry.npmjs.org/pretty-bytes/-/pretty-bytes-5.6.0.tgz" @@ -882,6 +1876,11 @@ pump@^3.0.0: end-of-stream "^1.1.0" once "^1.3.1" +punycode@^2.1.0: + version "2.3.1" + resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.3.1.tgz#027422e2faec0b25e1549c3e1bd8309b9133b6e5" + integrity sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg== + punycode@^2.1.1: version "2.1.1" resolved "https://registry.npmjs.org/punycode/-/punycode-2.1.1.tgz" @@ -892,6 +1891,21 @@ qs@~6.5.2: resolved "https://registry.npmjs.org/qs/-/qs-6.5.3.tgz" integrity sha512-qxXIEh4pCGfHICj1mAJQ2/2XVZkjCDTcEgfoSQxc/fYivUZxTkk7L3bDBJSoNrEzXI17oUO5Dp07ktqE5KzczA== +queue-microtask@^1.2.2: + version "1.2.3" + resolved "https://registry.yarnpkg.com/queue-microtask/-/queue-microtask-1.2.3.tgz#4929228bbc724dfac43e0efb058caf7b6cfb6243" + integrity sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A== + +regexp.prototype.flags@^1.5.2: + version "1.5.2" + resolved "https://registry.yarnpkg.com/regexp.prototype.flags/-/regexp.prototype.flags-1.5.2.tgz#138f644a3350f981a858c44f6bb1a61ff59be334" + integrity sha512-NcDiDkTLuPR+++OCKB0nWafEmhg/Da8aUPLPMQbK+bxKKCm1/S5he+AqYa4PlMCVBalb4/yxIRub6qkEx5yJbw== + dependencies: + call-bind "^1.0.6" + define-properties "^1.2.1" + es-errors "^1.3.0" + set-function-name "^2.0.1" + request-progress@^3.0.0: version "3.0.0" resolved "https://registry.npmjs.org/request-progress/-/request-progress-3.0.0.tgz" @@ -899,6 +1913,20 @@ request-progress@^3.0.0: dependencies: throttleit "^1.0.0" +resolve-from@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/resolve-from/-/resolve-from-4.0.0.tgz#4abcd852ad32dd7baabfe9b40e00a36db5f392e6" + integrity sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g== + +resolve@^1.22.4: + version "1.22.8" + resolved "https://registry.yarnpkg.com/resolve/-/resolve-1.22.8.tgz#b6c87a9f2aa06dfab52e3d70ac8cde321fa5a48d" + integrity sha512-oKWePCxqpd6FlLvGV1VU0x7bkPmmCNolxzjMf4NczoDnQcIWrAF+cPtZn5i6n+RfD2d9i0tzpKnG6Yk168yIyw== + dependencies: + is-core-module "^2.13.0" + path-parse "^1.0.7" + supports-preserve-symlinks-flag "^1.0.0" + restore-cursor@^3.1.0: version "3.1.0" resolved "https://registry.npmjs.org/restore-cursor/-/restore-cursor-3.1.0.tgz" @@ -907,18 +1935,30 @@ restore-cursor@^3.1.0: onetime "^5.1.0" signal-exit "^3.0.2" +reusify@^1.0.4: + version "1.0.4" + resolved "https://registry.yarnpkg.com/reusify/-/reusify-1.0.4.tgz#90da382b1e126efc02146e90845a88db12925d76" + integrity sha512-U9nH88a3fc/ekCF1l0/UP1IosiuIjyTh7hBvXVMHYgVcfGvt897Xguj2UOLDeI5BG2m7/uwyaLVT6fbtCwTyzw== + rfdc@^1.3.0: version "1.3.0" resolved "https://registry.npmjs.org/rfdc/-/rfdc-1.3.0.tgz" integrity sha512-V2hovdzFbOi77/WajaSMXk2OLm+xNIeQdMMuB7icj7bk6zi2F8GGAxigcnDFpJHbNyNcgyJDiP+8nOrY5cZGrA== -rimraf@^3.0.0: +rimraf@^3.0.0, rimraf@^3.0.2: version "3.0.2" resolved "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz" integrity sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA== dependencies: glob "^7.1.3" +run-parallel@^1.1.9: + version "1.2.0" + resolved "https://registry.yarnpkg.com/run-parallel/-/run-parallel-1.2.0.tgz#66d1368da7bdf921eb9d95bd1a9229e7f21a43ee" + integrity sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA== + dependencies: + queue-microtask "^1.2.2" + rxjs@^7.4.0: version "7.4.0" resolved "https://registry.npmjs.org/rxjs/-/rxjs-7.4.0.tgz" @@ -926,16 +1966,40 @@ rxjs@^7.4.0: dependencies: tslib "~2.1.0" +safe-array-concat@^1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/safe-array-concat/-/safe-array-concat-1.1.0.tgz#8d0cae9cb806d6d1c06e08ab13d847293ebe0692" + integrity sha512-ZdQ0Jeb9Ofti4hbt5lX3T2JcAamT9hfzYU1MNB+z/jaEbB6wfFfPIR/zEORmZqobkCCJhSjodobH6WHNmJ97dg== + dependencies: + call-bind "^1.0.5" + get-intrinsic "^1.2.2" + has-symbols "^1.0.3" + isarray "^2.0.5" + safe-buffer@^5.0.1, safe-buffer@^5.1.2: version "5.2.1" resolved "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz" integrity sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ== +safe-regex-test@^1.0.3: + version "1.0.3" + resolved "https://registry.yarnpkg.com/safe-regex-test/-/safe-regex-test-1.0.3.tgz#a5b4c0f06e0ab50ea2c395c14d8371232924c377" + integrity sha512-CdASjNJPvRa7roO6Ra/gLYBTzYzzPyyBXxIMdGW3USQLyjWEls2RgW5UBTXaQVp+OrpeCK3bLem8smtmheoRuw== + dependencies: + call-bind "^1.0.6" + es-errors "^1.3.0" + is-regex "^1.1.4" + safer-buffer@^2.0.2, safer-buffer@^2.1.0, safer-buffer@~2.1.0: version "2.1.2" resolved "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz" integrity sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg== +semver@^6.3.0, semver@^6.3.1: + version "6.3.1" + resolved "https://registry.yarnpkg.com/semver/-/semver-6.3.1.tgz#556d2ef8689146e46dcea4bfdd095f3434dffcb4" + integrity sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA== + semver@^7.3.2: version "7.3.5" resolved "https://registry.npmjs.org/semver/-/semver-7.3.5.tgz" @@ -943,6 +2007,28 @@ semver@^7.3.2: dependencies: lru-cache "^6.0.0" +set-function-length@^1.2.1: + version "1.2.1" + resolved "https://registry.yarnpkg.com/set-function-length/-/set-function-length-1.2.1.tgz#47cc5945f2c771e2cf261c6737cf9684a2a5e425" + integrity sha512-j4t6ccc+VsKwYHso+kElc5neZpjtq9EnRICFZtWyBsLojhmeF/ZBd/elqm22WJh/BziDe/SBiOeAt0m2mfLD0g== + dependencies: + define-data-property "^1.1.2" + es-errors "^1.3.0" + function-bind "^1.1.2" + get-intrinsic "^1.2.3" + gopd "^1.0.1" + has-property-descriptors "^1.0.1" + +set-function-name@^2.0.1: + version "2.0.2" + resolved "https://registry.yarnpkg.com/set-function-name/-/set-function-name-2.0.2.tgz#16a705c5a0dc2f5e638ca96d8a8cd4e1c2b90985" + integrity sha512-7PGFlmtwsEADb0WYyvCMa1t+yke6daIG4Wirafur5kcf+MhUnPms1UeR0CKQdTZD81yESwMHbtn+TR+dMviakQ== + dependencies: + define-data-property "^1.1.4" + es-errors "^1.3.0" + functions-have-names "^1.2.3" + has-property-descriptors "^1.0.2" + shebang-command@^2.0.0: version "2.0.0" resolved "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz" @@ -955,6 +2041,16 @@ shebang-regex@^3.0.0: resolved "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz" integrity sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A== +side-channel@^1.0.4: + version "1.0.5" + resolved "https://registry.yarnpkg.com/side-channel/-/side-channel-1.0.5.tgz#9a84546599b48909fb6af1211708d23b1946221b" + integrity sha512-QcgiIWV4WV7qWExbN5llt6frQB/lBven9pqliLXfGPB+K9ZYXxDozp0wLkHS24kWCm+6YXH/f0HhnObZnZOBnQ== + dependencies: + call-bind "^1.0.6" + es-errors "^1.3.0" + get-intrinsic "^1.2.4" + object-inspect "^1.13.1" + signal-exit@^3.0.2: version "3.0.6" resolved "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.6.tgz" @@ -1002,6 +2098,33 @@ string-width@^4.1.0, string-width@^4.2.0: is-fullwidth-code-point "^3.0.0" strip-ansi "^6.0.1" +string.prototype.trim@^1.2.8: + version "1.2.8" + resolved "https://registry.yarnpkg.com/string.prototype.trim/-/string.prototype.trim-1.2.8.tgz#f9ac6f8af4bd55ddfa8895e6aea92a96395393bd" + integrity sha512-lfjY4HcixfQXOfaqCvcBuOIapyaroTXhbkfJN3gcB1OtyupngWK4sEET9Knd0cXd28kTUqu/kHoV4HKSJdnjiQ== + dependencies: + call-bind "^1.0.2" + define-properties "^1.2.0" + es-abstract "^1.22.1" + +string.prototype.trimend@^1.0.7: + version "1.0.7" + resolved "https://registry.yarnpkg.com/string.prototype.trimend/-/string.prototype.trimend-1.0.7.tgz#1bb3afc5008661d73e2dc015cd4853732d6c471e" + integrity sha512-Ni79DqeB72ZFq1uH/L6zJ+DKZTkOtPIHovb3YZHQViE+HDouuU4mBrLOLDn5Dde3RF8qw5qVETEjhu9locMLvA== + dependencies: + call-bind "^1.0.2" + define-properties "^1.2.0" + es-abstract "^1.22.1" + +string.prototype.trimstart@^1.0.7: + version "1.0.7" + resolved "https://registry.yarnpkg.com/string.prototype.trimstart/-/string.prototype.trimstart-1.0.7.tgz#d4cdb44b83a4737ffbac2d406e405d43d0184298" + integrity sha512-NGhtDFu3jCEm7B4Fy0DpLewdJQOZcQ0rGbwQ/+stjnrp2i+rlKeCvos9hOIeCmqwratM47OBxY7uFZzjxHXmrg== + dependencies: + call-bind "^1.0.2" + define-properties "^1.2.0" + es-abstract "^1.22.1" + strip-ansi@^6.0.0, strip-ansi@^6.0.1: version "6.0.1" resolved "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz" @@ -1009,11 +2132,21 @@ strip-ansi@^6.0.0, strip-ansi@^6.0.1: dependencies: ansi-regex "^5.0.1" +strip-bom@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/strip-bom/-/strip-bom-3.0.0.tgz#2334c18e9c759f7bdd56fdef7e9ae3d588e68ed3" + integrity sha512-vavAMRXOgBVNF6nyEEmL3DBK19iRpDcoIwW+swQ+CbGiu7lju6t+JklA1MHweoWtadgt4ISVUsXLyDq34ddcwA== + strip-final-newline@^2.0.0: version "2.0.0" resolved "https://registry.npmjs.org/strip-final-newline/-/strip-final-newline-2.0.0.tgz" integrity sha512-BrpvfNAE3dcvq7ll3xVumzjKjZQ5tI1sEUIKr3Uoks0XUl45St3FlatVqef9prk4jRDzhW6WZg+3bk93y6pLjA== +strip-json-comments@^3.1.1: + version "3.1.1" + resolved "https://registry.yarnpkg.com/strip-json-comments/-/strip-json-comments-3.1.1.tgz#31f1281b3832630434831c310c01cccda8cbe006" + integrity sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig== + supports-color@^7.1.0: version "7.2.0" resolved "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz" @@ -1028,6 +2161,16 @@ supports-color@^8.1.1: dependencies: has-flag "^4.0.0" +supports-preserve-symlinks-flag@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz#6eda4bd344a3c94aea376d4cc31bc77311039e09" + integrity sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w== + +text-table@^0.2.0: + version "0.2.0" + resolved "https://registry.yarnpkg.com/text-table/-/text-table-0.2.0.tgz#7f5ee823ae805207c00af2df4a84ec3fcfa570b4" + integrity sha512-N+8UisAXDGk8PFXP4HAzVR9nbfmVJ3zYLAWiTIoqC5v5isinhr+r5uaO8+7r3BMfuNIufIsA7RdpVgacC2cSpw== + throttleit@^1.0.0: version "1.0.0" resolved "https://registry.npmjs.org/throttleit/-/throttleit-1.0.0.tgz" @@ -1053,6 +2196,16 @@ tough-cookie@~2.5.0: psl "^1.1.28" punycode "^2.1.1" +tsconfig-paths@^3.15.0: + version "3.15.0" + resolved "https://registry.yarnpkg.com/tsconfig-paths/-/tsconfig-paths-3.15.0.tgz#5299ec605e55b1abb23ec939ef15edaf483070d4" + integrity sha512-2Ac2RgzDe/cn48GvOe3M+o82pEFewD3UPbyoUHHdKasHwJKjds4fLXWf/Ux5kATBKN20oaFGu+jbElp1pos0mg== + dependencies: + "@types/json5" "^0.0.29" + json5 "^1.0.2" + minimist "^1.2.6" + strip-bom "^3.0.0" + tslib@~2.1.0: version "2.1.0" resolved "https://registry.npmjs.org/tslib/-/tslib-2.1.0.tgz" @@ -1070,11 +2223,77 @@ tweetnacl@^0.14.3, tweetnacl@~0.14.0: resolved "https://registry.npmjs.org/tweetnacl/-/tweetnacl-0.14.5.tgz" integrity sha1-WuaBd/GS1EViadEIr6k/+HQ/T2Q= +type-check@^0.4.0, type-check@~0.4.0: + version "0.4.0" + resolved "https://registry.yarnpkg.com/type-check/-/type-check-0.4.0.tgz#07b8203bfa7056c0657050e3ccd2c37730bab8f1" + integrity sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew== + dependencies: + prelude-ls "^1.2.1" + +type-fest@^0.20.2: + version "0.20.2" + resolved "https://registry.yarnpkg.com/type-fest/-/type-fest-0.20.2.tgz#1bf207f4b28f91583666cb5fbd327887301cd5f4" + integrity sha512-Ne+eE4r0/iWnpAxD852z3A+N0Bt5RN//NjJwRd2VFHEmrywxf5vsZlh4R6lixl6B+wz/8d+maTSAkN1FIkI3LQ== + type-fest@^0.21.3: version "0.21.3" resolved "https://registry.npmjs.org/type-fest/-/type-fest-0.21.3.tgz" integrity sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w== +typed-array-buffer@^1.0.1: + version "1.0.2" + resolved "https://registry.yarnpkg.com/typed-array-buffer/-/typed-array-buffer-1.0.2.tgz#1867c5d83b20fcb5ccf32649e5e2fc7424474ff3" + integrity sha512-gEymJYKZtKXzzBzM4jqa9w6Q1Jjm7x2d+sh19AdsD4wqnMPDYyvwpsIc2Q/835kHuo3BEQ7CjelGhfTsoBb2MQ== + dependencies: + call-bind "^1.0.7" + es-errors "^1.3.0" + is-typed-array "^1.1.13" + +typed-array-byte-length@^1.0.0: + version "1.0.1" + resolved "https://registry.yarnpkg.com/typed-array-byte-length/-/typed-array-byte-length-1.0.1.tgz#d92972d3cff99a3fa2e765a28fcdc0f1d89dec67" + integrity sha512-3iMJ9q0ao7WE9tWcaYKIptkNBuOIcZCCT0d4MRvuuH88fEoEH62IuQe0OtraD3ebQEoTRk8XCBoknUNc1Y67pw== + dependencies: + call-bind "^1.0.7" + for-each "^0.3.3" + gopd "^1.0.1" + has-proto "^1.0.3" + is-typed-array "^1.1.13" + +typed-array-byte-offset@^1.0.0: + version "1.0.2" + resolved "https://registry.yarnpkg.com/typed-array-byte-offset/-/typed-array-byte-offset-1.0.2.tgz#f9ec1acb9259f395093e4567eb3c28a580d02063" + integrity sha512-Ous0vodHa56FviZucS2E63zkgtgrACj7omjwd/8lTEMEPFFyjfixMZ1ZXenpgCFBBt4EC1J2XsyVS2gkG0eTFA== + dependencies: + available-typed-arrays "^1.0.7" + call-bind "^1.0.7" + for-each "^0.3.3" + gopd "^1.0.1" + has-proto "^1.0.3" + is-typed-array "^1.1.13" + +typed-array-length@^1.0.4: + version "1.0.5" + resolved "https://registry.yarnpkg.com/typed-array-length/-/typed-array-length-1.0.5.tgz#57d44da160296d8663fd63180a1802ebf25905d5" + integrity sha512-yMi0PlwuznKHxKmcpoOdeLwxBoVPkqZxd7q2FgMkmD3bNwvF5VW0+UlUQ1k1vmktTu4Yu13Q0RIxEP8+B+wloA== + dependencies: + call-bind "^1.0.7" + for-each "^0.3.3" + gopd "^1.0.1" + has-proto "^1.0.3" + is-typed-array "^1.1.13" + possible-typed-array-names "^1.0.0" + +unbox-primitive@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/unbox-primitive/-/unbox-primitive-1.0.2.tgz#29032021057d5e6cdbd08c5129c226dff8ed6f9e" + integrity sha512-61pPlCD9h51VoreyJ0BReideM3MDKMKnh6+V9L08331ipq6Q8OFXZYiqP6n/tbHx4s5I9uRhcye6BrbkizkBDw== + dependencies: + call-bind "^1.0.2" + has-bigints "^1.0.2" + has-symbols "^1.0.3" + which-boxed-primitive "^1.0.2" + universalify@^2.0.0: version "2.0.0" resolved "https://registry.npmjs.org/universalify/-/universalify-2.0.0.tgz" @@ -1085,6 +2304,13 @@ untildify@^4.0.0: resolved "https://registry.npmjs.org/untildify/-/untildify-4.0.0.tgz" integrity sha512-KK8xQ1mkzZeg9inewmFVDNkg3l5LUhoq9kN6iWYB/CC9YMG8HA+c1Q8HwDe6dEX7kErrEVNVBO3fWsVq5iDgtw== +uri-js@^4.2.2: + version "4.4.1" + resolved "https://registry.yarnpkg.com/uri-js/-/uri-js-4.4.1.tgz#9b1a52595225859e55f669d928f88c6c57f2a77e" + integrity sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg== + dependencies: + punycode "^2.1.0" + uuid@^8.3.2: version "8.3.2" resolved "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz" @@ -1099,6 +2325,28 @@ verror@1.10.0: core-util-is "1.0.2" extsprintf "^1.2.0" +which-boxed-primitive@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/which-boxed-primitive/-/which-boxed-primitive-1.0.2.tgz#13757bc89b209b049fe5d86430e21cf40a89a8e6" + integrity sha512-bwZdv0AKLpplFY2KZRX6TvyuN7ojjr7lwkg6ml0roIy9YeuSr7JS372qlNW18UQYzgYK9ziGcerWqZOmEn9VNg== + dependencies: + is-bigint "^1.0.1" + is-boolean-object "^1.1.0" + is-number-object "^1.0.4" + is-string "^1.0.5" + is-symbol "^1.0.3" + +which-typed-array@^1.1.14: + version "1.1.14" + resolved "https://registry.yarnpkg.com/which-typed-array/-/which-typed-array-1.1.14.tgz#1f78a111aee1e131ca66164d8bdc3ab062c95a06" + integrity sha512-VnXFiIW8yNn9kIHN88xvZ4yOWchftKDsRJ8fEPacX/wl1lOvBrhsJ/OeJCXq7B0AaijRuqgzSKalJoPk+D8MPg== + dependencies: + available-typed-arrays "^1.0.6" + call-bind "^1.0.5" + for-each "^0.3.3" + gopd "^1.0.1" + has-tostringtag "^1.0.1" + which@^2.0.1: version "2.0.2" resolved "https://registry.npmjs.org/which/-/which-2.0.2.tgz" @@ -1141,3 +2389,8 @@ yauzl@^2.10.0: dependencies: buffer-crc32 "~0.2.3" fd-slicer "~1.1.0" + +yocto-queue@^0.1.0: + version "0.1.0" + resolved "https://registry.yarnpkg.com/yocto-queue/-/yocto-queue-0.1.0.tgz#0294eb3dee05028d31ee1a5fa2c556a6aaf10a1b" + integrity sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==