diff --git a/.github/workflows/metadata-model.yml b/.github/workflows/metadata-model.yml index 632e6ac35d673e..6f62284afcc172 100644 --- a/.github/workflows/metadata-model.yml +++ b/.github/workflows/metadata-model.yml @@ -20,11 +20,9 @@ jobs: steps: - name: Check whether upload to datahub is enabled id: publish - env: - ENABLE_PUBLISH: ${{ secrets.DataHubToken }} run: | - echo "Enable publish: ${{ env.ENABLE_PUBLISH != '' }}" - echo "publish=${{ env.ENABLE_PUBLISH != '' }}" >> $GITHUB_OUTPUT + echo "Enable publish: ${{ github.repository == 'datahub-project/datahub' }}" + echo "publish=${{ github.repository == 'datahub-project/datahub' }}" >> $GITHUB_OUTPUT metadata-ingestion-docgen: runs-on: ubuntu-latest needs: setup diff --git a/build.gradle b/build.gradle index 284092e2b14f49..e4fd70a99e6434 100644 --- a/build.gradle +++ b/build.gradle @@ -195,7 +195,7 @@ project.ext.externalDependency = [ 'kafkaAvroSerde': "io.confluent:kafka-streams-avro-serde:$kafkaVersion", 'kafkaAvroSerializer': 'io.confluent:kafka-avro-serializer:5.1.4', 'kafkaClients': "org.apache.kafka:kafka-clients:$kafkaVersion-ccs", - 'snappy': 'org.xerial.snappy:snappy-java:1.1.10.4', + 'snappy': 'org.xerial.snappy:snappy-java:1.1.10.5', 'logbackClassic': "ch.qos.logback:logback-classic:$logbackClassic", 'logbackClassicJava8' : "ch.qos.logback:logback-classic:$logbackClassicJava8", 'slf4jApi': "org.slf4j:slf4j-api:$slf4jVersion", diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index 59335ba605a741..b15db80a8487ae 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -56,6 +56,7 @@ import com.linkedin.datahub.graphql.generated.DataJobInputOutput; import com.linkedin.datahub.graphql.generated.DataPlatform; import com.linkedin.datahub.graphql.generated.DataPlatformInstance; +import com.linkedin.datahub.graphql.generated.DataProcessInstance; import com.linkedin.datahub.graphql.generated.DataQualityContract; import com.linkedin.datahub.graphql.generated.Dataset; import com.linkedin.datahub.graphql.generated.DatasetStatsSummary; @@ -173,6 +174,8 @@ import com.linkedin.datahub.graphql.resolvers.embed.UpdateEmbedResolver; import com.linkedin.datahub.graphql.resolvers.entity.EntityExistsResolver; import com.linkedin.datahub.graphql.resolvers.entity.EntityPrivilegesResolver; +import com.linkedin.datahub.graphql.resolvers.entity.versioning.LinkAssetVersionResolver; +import com.linkedin.datahub.graphql.resolvers.entity.versioning.UnlinkAssetVersionResolver; import com.linkedin.datahub.graphql.resolvers.form.BatchAssignFormResolver; import com.linkedin.datahub.graphql.resolvers.form.BatchRemoveFormResolver; import com.linkedin.datahub.graphql.resolvers.form.CreateDynamicFormAssignmentResolver; @@ -346,6 +349,7 @@ import com.linkedin.datahub.graphql.types.datajob.DataJobType; import com.linkedin.datahub.graphql.types.dataplatform.DataPlatformType; import com.linkedin.datahub.graphql.types.dataplatforminstance.DataPlatformInstanceType; +import com.linkedin.datahub.graphql.types.dataprocessinst.DataProcessInstanceType; import com.linkedin.datahub.graphql.types.dataprocessinst.mappers.DataProcessInstanceRunEventMapper; import com.linkedin.datahub.graphql.types.dataproduct.DataProductType; import com.linkedin.datahub.graphql.types.dataset.DatasetType; @@ -389,6 +393,7 @@ import com.linkedin.metadata.config.telemetry.TelemetryConfiguration; import com.linkedin.metadata.connection.ConnectionService; import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.versioning.EntityVersioningService; import com.linkedin.metadata.graph.GraphClient; import com.linkedin.metadata.graph.SiblingGraphService; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -474,6 +479,7 @@ public class GmsGraphQLEngine { private final RestrictedService restrictedService; private ConnectionService connectionService; private AssertionService assertionService; + private final EntityVersioningService entityVersioningService; private final BusinessAttributeService businessAttributeService; private final FeatureFlags featureFlags; @@ -530,6 +536,7 @@ public class GmsGraphQLEngine { private final FormType formType; private final IncidentType incidentType; private final RestrictedType restrictedType; + private final DataProcessInstanceType dataProcessInstanceType; private final int graphQLQueryComplexityLimit; private final int graphQLQueryDepthLimit; @@ -596,6 +603,7 @@ public GmsGraphQLEngine(final GmsGraphQLEngineArgs args) { this.restrictedService = args.restrictedService; this.connectionService = args.connectionService; this.assertionService = args.assertionService; + this.entityVersioningService = args.entityVersioningService; this.businessAttributeService = args.businessAttributeService; this.ingestionConfiguration = Objects.requireNonNull(args.ingestionConfiguration); @@ -649,6 +657,7 @@ public GmsGraphQLEngine(final GmsGraphQLEngineArgs args) { this.formType = new FormType(entityClient); this.incidentType = new IncidentType(entityClient); this.restrictedType = new RestrictedType(entityClient, restrictedService); + this.dataProcessInstanceType = new DataProcessInstanceType(entityClient, featureFlags); this.graphQLQueryComplexityLimit = args.graphQLQueryComplexityLimit; this.graphQLQueryDepthLimit = args.graphQLQueryDepthLimit; @@ -699,7 +708,8 @@ public GmsGraphQLEngine(final GmsGraphQLEngineArgs args) { formType, incidentType, restrictedType, - businessAttributeType)); + businessAttributeType, + dataProcessInstanceType)); this.loadableTypes = new ArrayList<>(entityTypes); // Extend loadable types with types from the plugins // This allows us to offer search and browse capabilities out of the box for @@ -1024,6 +1034,7 @@ private void configureQueryResolvers(final RuntimeWiring.Builder builder) { .dataFetcher("tag", getResolver(tagType)) .dataFetcher("dataFlow", getResolver(dataFlowType)) .dataFetcher("dataJob", getResolver(dataJobType)) + .dataFetcher("dataProcessInstance", getResolver(dataProcessInstanceType)) .dataFetcher("glossaryTerm", getResolver(glossaryTermType)) .dataFetcher("glossaryNode", getResolver(glossaryNodeType)) .dataFetcher("domain", getResolver((domainType))) @@ -1386,6 +1397,16 @@ private void configureMutationResolvers(final RuntimeWiring.Builder builder) { "removeBusinessAttribute", new RemoveBusinessAttributeResolver(this.entityService)); } + if (featureFlags.isEntityVersioning()) { + typeWiring + .dataFetcher( + "linkAssetVersion", + new LinkAssetVersionResolver(this.entityVersioningService, this.featureFlags)) + .dataFetcher( + "unlinkAssetVersion", + new UnlinkAssetVersionResolver( + this.entityVersioningService, this.featureFlags)); + } return typeWiring; }); } @@ -3058,6 +3079,35 @@ private void configureDataProcessInstanceResolvers(final RuntimeWiring.Builder b "DataProcessInstance", typeWiring -> typeWiring + .dataFetcher( + "dataPlatformInstance", + new LoadableTypeResolver<>( + dataPlatformInstanceType, + (env) -> { + final DataProcessInstance dataProcessInstance = env.getSource(); + return dataProcessInstance.getDataPlatformInstance() != null + ? dataProcessInstance.getDataPlatformInstance().getUrn() + : null; + })) + .dataFetcher( + "platform", + new LoadableTypeResolver<>( + dataPlatformType, + (env) -> { + final DataProcessInstance dataProcessInstance = env.getSource(); + return dataProcessInstance.getPlatform() != null + ? dataProcessInstance.getPlatform().getUrn() + : null; + })) + .dataFetcher("parentContainers", new ParentContainersResolver(entityClient)) + .dataFetcher( + "container", + new LoadableTypeResolver<>( + containerType, + (env) -> { + final DataProcessInstance dpi = env.getSource(); + return dpi.getContainer() != null ? dpi.getContainer().getUrn() : null; + })) .dataFetcher("relationships", new EntityRelationshipsResultResolver(graphClient)) .dataFetcher( "lineage", diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java index f6ab3a603dbb7b..131f4e87637807 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java @@ -21,6 +21,7 @@ import com.linkedin.metadata.config.telemetry.TelemetryConfiguration; import com.linkedin.metadata.connection.ConnectionService; import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.versioning.EntityVersioningService; import com.linkedin.metadata.graph.GraphClient; import com.linkedin.metadata.graph.SiblingGraphService; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -88,6 +89,7 @@ public class GmsGraphQLEngineArgs { BusinessAttributeService businessAttributeService; ConnectionService connectionService; AssertionService assertionService; + EntityVersioningService entityVersioningService; // any fork specific args should go below this line } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolver.java new file mode 100644 index 00000000000000..69e049af1e87b7 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolver.java @@ -0,0 +1,88 @@ +package com.linkedin.datahub.graphql.resolvers.entity.versioning; + +import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument; +import static com.linkedin.metadata.Constants.VERSION_SET_ENTITY_NAME; +import static com.linkedin.metadata.authorization.ApiOperation.UPDATE; + +import com.datahub.authorization.AuthUtil; +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; +import com.linkedin.datahub.graphql.exception.AuthorizationException; +import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.datahub.graphql.generated.LinkVersionInput; +import com.linkedin.metadata.entity.IngestResult; +import com.linkedin.metadata.entity.versioning.EntityVersioningService; +import com.linkedin.metadata.entity.versioning.VersionPropertiesInput; +import graphql.schema.DataFetcher; +import graphql.schema.DataFetchingEnvironment; +import io.datahubproject.metadata.context.OperationContext; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import org.apache.commons.lang.StringUtils; + +/** + * Currently only supports linking the latest version, but may be modified later to support inserts + */ +public class LinkAssetVersionResolver implements DataFetcher> { + + private final EntityVersioningService entityVersioningService; + private final FeatureFlags featureFlags; + + public LinkAssetVersionResolver( + EntityVersioningService entityVersioningService, FeatureFlags featureFlags) { + this.entityVersioningService = entityVersioningService; + this.featureFlags = featureFlags; + } + + @Override + public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { + final QueryContext context = environment.getContext(); + final LinkVersionInput input = + bindArgument(environment.getArgument("input"), LinkVersionInput.class); + if (!featureFlags.isEntityVersioning()) { + throw new IllegalAccessError( + "Entity Versioning is not configured, please enable before attempting to use this feature."); + } + Urn versionSetUrn = UrnUtils.getUrn(input.getVersionSet()); + if (!VERSION_SET_ENTITY_NAME.equals(versionSetUrn.getEntityType())) { + throw new IllegalArgumentException( + String.format("Version Set urn %s must be of type Version Set.", input.getVersionSet())); + } + Urn entityUrn = UrnUtils.getUrn(input.getLinkedEntity()); + OperationContext opContext = context.getOperationContext(); + if (!AuthUtil.isAPIAuthorizedEntityUrns( + opContext, UPDATE, ImmutableSet.of(versionSetUrn, entityUrn))) { + throw new AuthorizationException( + String.format( + "%s is unauthorized to %s entities %s and %s", + opContext.getAuthentication().getActor().toUrnStr(), + UPDATE, + input.getVersionSet(), + input.getLinkedEntity())); + } + VersionPropertiesInput versionPropertiesInput = + new VersionPropertiesInput( + input.getComment(), + input.getVersion(), + input.getSourceTimestamp(), + input.getSourceCreator()); + return GraphQLConcurrencyUtils.supplyAsync( + () -> { + List linkResults = + entityVersioningService.linkLatestVersion( + opContext, versionSetUrn, entityUrn, versionPropertiesInput); + + return linkResults.stream() + .filter( + ingestResult -> input.getLinkedEntity().equals(ingestResult.getUrn().toString())) + .map(ingestResult -> ingestResult.getUrn().toString()) + .findAny() + .orElse(StringUtils.EMPTY); + }, + this.getClass().getSimpleName(), + "get"); + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolver.java new file mode 100644 index 00000000000000..3d5027a0d668ac --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolver.java @@ -0,0 +1,67 @@ +package com.linkedin.datahub.graphql.resolvers.entity.versioning; + +import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument; +import static com.linkedin.metadata.Constants.VERSION_SET_ENTITY_NAME; +import static com.linkedin.metadata.authorization.ApiOperation.UPDATE; + +import com.datahub.authorization.AuthUtil; +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; +import com.linkedin.datahub.graphql.exception.AuthorizationException; +import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.datahub.graphql.generated.UnlinkVersionInput; +import com.linkedin.metadata.entity.versioning.EntityVersioningService; +import graphql.schema.DataFetcher; +import graphql.schema.DataFetchingEnvironment; +import io.datahubproject.metadata.context.OperationContext; +import java.util.concurrent.CompletableFuture; + +public class UnlinkAssetVersionResolver implements DataFetcher> { + + private final EntityVersioningService entityVersioningService; + private final FeatureFlags featureFlags; + + public UnlinkAssetVersionResolver( + EntityVersioningService entityVersioningService, FeatureFlags featureFlags) { + this.entityVersioningService = entityVersioningService; + this.featureFlags = featureFlags; + } + + @Override + public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { + if (!featureFlags.isEntityVersioning()) { + throw new IllegalAccessError( + "Entity Versioning is not configured, please enable before attempting to use this feature."); + } + final QueryContext context = environment.getContext(); + final UnlinkVersionInput input = + bindArgument(environment.getArgument("input"), UnlinkVersionInput.class); + Urn versionSetUrn = UrnUtils.getUrn(input.getVersionSet()); + if (!VERSION_SET_ENTITY_NAME.equals(versionSetUrn.getEntityType())) { + throw new IllegalArgumentException( + String.format("Version Set urn %s must be of type Version Set.", input.getVersionSet())); + } + Urn entityUrn = UrnUtils.getUrn(input.getUnlinkedEntity()); + OperationContext opContext = context.getOperationContext(); + if (!AuthUtil.isAPIAuthorizedEntityUrns( + opContext, UPDATE, ImmutableSet.of(versionSetUrn, entityUrn))) { + throw new AuthorizationException( + String.format( + "%s is unauthorized to %s entities %s and %s", + opContext.getAuthentication().getActor(), + UPDATE, + input.getVersionSet(), + input.getUnlinkedEntity())); + } + return GraphQLConcurrencyUtils.supplyAsync( + () -> { + entityVersioningService.unlinkVersion(opContext, versionSetUrn, entityUrn); + return true; + }, + this.getClass().getSimpleName(), + "get"); + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/DataPlatformInstanceAspectMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/DataPlatformInstanceAspectMapper.java index 4345819867617b..ab3127a3ae232b 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/DataPlatformInstanceAspectMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/DataPlatformInstanceAspectMapper.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.types.common.mappers; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.DataPlatform; import com.linkedin.datahub.graphql.generated.DataPlatformInstance; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.types.mappers.ModelMapper; @@ -28,6 +29,11 @@ public DataPlatformInstance apply( result.setType(EntityType.DATA_PLATFORM_INSTANCE); result.setUrn(input.getInstance().toString()); } + result.setPlatform( + DataPlatform.builder() + .setUrn(input.getPlatform().toString()) + .setType(EntityType.DATA_PLATFORM) + .build()); return result; } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/TimeStampToAuditStampMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/TimeStampToAuditStampMapper.java new file mode 100644 index 00000000000000..58f78b146b406c --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/TimeStampToAuditStampMapper.java @@ -0,0 +1,24 @@ +package com.linkedin.datahub.graphql.types.common.mappers; + +import com.linkedin.common.TimeStamp; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.AuditStamp; +import javax.annotation.Nullable; + +public class TimeStampToAuditStampMapper { + + public static final TimeStampToAuditStampMapper INSTANCE = new TimeStampToAuditStampMapper(); + + public static AuditStamp map( + @Nullable final QueryContext context, @Nullable final TimeStamp input) { + if (input == null) { + return null; + } + final AuditStamp result = new AuditStamp(); + result.setTime(input.getTime()); + if (input.hasActor()) { + result.setActor(input.getActor().toString()); + } + return result; + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java index 1988cafc486c18..eae33e6da2e56d 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java @@ -18,6 +18,7 @@ import com.linkedin.datahub.graphql.generated.DataJob; import com.linkedin.datahub.graphql.generated.DataPlatform; import com.linkedin.datahub.graphql.generated.DataPlatformInstance; +import com.linkedin.datahub.graphql.generated.DataProcessInstance; import com.linkedin.datahub.graphql.generated.DataProduct; import com.linkedin.datahub.graphql.generated.Dataset; import com.linkedin.datahub.graphql.generated.Domain; @@ -225,6 +226,11 @@ public Entity apply(@Nullable QueryContext context, Urn input) { ((BusinessAttribute) partialEntity).setUrn(input.toString()); ((BusinessAttribute) partialEntity).setType(EntityType.BUSINESS_ATTRIBUTE); } + if (input.getEntityType().equals(DATA_PROCESS_INSTANCE_ENTITY_NAME)) { + partialEntity = new DataProcessInstance(); + ((DataProcessInstance) partialEntity).setUrn(input.toString()); + ((DataProcessInstance) partialEntity).setType(EntityType.DATA_PROCESS_INSTANCE); + } return partialEntity; } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceType.java new file mode 100644 index 00000000000000..eeaaaa96f51704 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceType.java @@ -0,0 +1,102 @@ +package com.linkedin.datahub.graphql.types.dataprocessinst; + +import static com.linkedin.metadata.Constants.*; + +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.datahub.graphql.generated.DataProcessInstance; +import com.linkedin.datahub.graphql.generated.Entity; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.types.dataprocessinst.mappers.DataProcessInstanceMapper; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.client.EntityClient; +import graphql.execution.DataFetcherResult; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import lombok.RequiredArgsConstructor; + +@RequiredArgsConstructor +public class DataProcessInstanceType + implements com.linkedin.datahub.graphql.types.EntityType { + + public static final Set ASPECTS_TO_FETCH = + ImmutableSet.of( + DATA_PROCESS_INSTANCE_KEY_ASPECT_NAME, + DATA_PLATFORM_INSTANCE_ASPECT_NAME, + DATA_PROCESS_INSTANCE_PROPERTIES_ASPECT_NAME, + DATA_PROCESS_INSTANCE_INPUT_ASPECT_NAME, + DATA_PROCESS_INSTANCE_OUTPUT_ASPECT_NAME, + DATA_PROCESS_INSTANCE_RUN_EVENT_ASPECT_NAME, + TEST_RESULTS_ASPECT_NAME, + DATA_PROCESS_INSTANCE_RELATIONSHIPS_ASPECT_NAME, + ML_TRAINING_RUN_PROPERTIES_ASPECT_NAME, + SUB_TYPES_ASPECT_NAME, + CONTAINER_ASPECT_NAME); + + private final EntityClient _entityClient; + private final FeatureFlags _featureFlags; + + @Override + public EntityType type() { + return EntityType.DATA_PROCESS_INSTANCE; + } + + @Override + public Function getKeyProvider() { + return Entity::getUrn; + } + + @Override + public Class objectClass() { + return DataProcessInstance.class; + } + + @Override + public List> batchLoad( + @Nonnull List urns, @Nonnull QueryContext context) throws Exception { + final List dataProcessInstanceUrns = + urns.stream().map(UrnUtils::getUrn).collect(Collectors.toList()); + + try { + Map entities = new HashMap<>(); + if (_featureFlags.isDataProcessInstanceEntityEnabled()) { + entities = + _entityClient.batchGetV2( + context.getOperationContext(), + DATA_PROCESS_INSTANCE_ENTITY_NAME, + new HashSet<>(dataProcessInstanceUrns), + ASPECTS_TO_FETCH); + } + + final List gmsResults = new ArrayList<>(); + for (Urn urn : dataProcessInstanceUrns) { + if (_featureFlags.isDataProcessInstanceEntityEnabled()) { + gmsResults.add(entities.getOrDefault(urn, null)); + } + } + + return gmsResults.stream() + .map( + gmsResult -> + gmsResult == null + ? null + : DataFetcherResult.newResult() + .data(DataProcessInstanceMapper.map(context, gmsResult)) + .build()) + .collect(Collectors.toList()); + + } catch (Exception e) { + throw new RuntimeException("Failed to load Data Process Instance entity", e); + } + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapper.java index 7a4d342281fe54..28c9c8936fdbfb 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapper.java @@ -2,25 +2,38 @@ import static com.linkedin.metadata.Constants.*; +import com.linkedin.common.DataPlatformInstance; +import com.linkedin.common.SubTypes; +import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; import com.linkedin.data.template.RecordTemplate; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.DataPlatform; import com.linkedin.datahub.graphql.generated.DataProcessInstance; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.types.common.mappers.AuditStampMapper; +import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper; +import com.linkedin.datahub.graphql.types.common.mappers.DataPlatformInstanceAspectMapper; +import com.linkedin.datahub.graphql.types.common.mappers.SubTypesMapper; import com.linkedin.datahub.graphql.types.common.mappers.util.MappingHelper; import com.linkedin.datahub.graphql.types.mappers.ModelMapper; +import com.linkedin.datahub.graphql.types.mlmodel.mappers.MLHyperParamMapper; +import com.linkedin.datahub.graphql.types.mlmodel.mappers.MLMetricMapper; import com.linkedin.dataprocess.DataProcessInstanceProperties; import com.linkedin.entity.EntityResponse; import com.linkedin.entity.EnvelopedAspectMap; +import com.linkedin.ml.metadata.MLTrainingRunProperties; +import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; +import lombok.extern.slf4j.Slf4j; /** * Maps Pegasus {@link RecordTemplate} objects to objects conforming to the GQL schema. * *

To be replaced by auto-generated mappers implementations */ +@Slf4j public class DataProcessInstanceMapper implements ModelMapper { public static final DataProcessInstanceMapper INSTANCE = new DataProcessInstanceMapper(); @@ -30,6 +43,19 @@ public static DataProcessInstance map( return INSTANCE.apply(context, entityResponse); } + private void mapContainers( + @Nullable final QueryContext context, + @Nonnull DataProcessInstance dataProcessInstance, + @Nonnull DataMap dataMap) { + final com.linkedin.container.Container gmsContainer = + new com.linkedin.container.Container(dataMap); + dataProcessInstance.setContainer( + com.linkedin.datahub.graphql.generated.Container.builder() + .setType(EntityType.CONTAINER) + .setUrn(gmsContainer.getContainer().toString()) + .build()); + } + @Override public DataProcessInstance apply( @Nullable QueryContext context, @Nonnull final EntityResponse entityResponse) { @@ -37,24 +63,97 @@ public DataProcessInstance apply( result.setUrn(entityResponse.getUrn().toString()); result.setType(EntityType.DATA_PROCESS_INSTANCE); + Urn entityUrn = entityResponse.getUrn(); EnvelopedAspectMap aspectMap = entityResponse.getAspects(); MappingHelper mappingHelper = new MappingHelper<>(aspectMap, result); mappingHelper.mapToResult( - context, DATA_PROCESS_INSTANCE_PROPERTIES_ASPECT_NAME, this::mapDataProcessProperties); + DATA_PROCESS_INSTANCE_PROPERTIES_ASPECT_NAME, + (dataProcessInstance, dataMap) -> + mapDataProcessProperties(context, dataProcessInstance, dataMap, entityUrn)); + mappingHelper.mapToResult( + ML_TRAINING_RUN_PROPERTIES_ASPECT_NAME, + (dataProcessInstance, dataMap) -> + mapTrainingRunProperties(context, dataProcessInstance, dataMap)); + mappingHelper.mapToResult( + DATA_PLATFORM_INSTANCE_ASPECT_NAME, + (dataProcessInstance, dataMap) -> { + DataPlatformInstance dataPlatformInstance = new DataPlatformInstance(dataMap); + dataProcessInstance.setDataPlatformInstance( + DataPlatformInstanceAspectMapper.map(context, dataPlatformInstance)); + DataPlatform dataPlatform = new DataPlatform(); + dataPlatform.setUrn(dataPlatformInstance.getPlatform().toString()); + dataPlatform.setType(EntityType.DATA_PLATFORM); + dataProcessInstance.setPlatform(dataPlatform); + }); + mappingHelper.mapToResult( + SUB_TYPES_ASPECT_NAME, + (dataProcessInstance, dataMap) -> + dataProcessInstance.setSubTypes(SubTypesMapper.map(context, new SubTypes(dataMap)))); + mappingHelper.mapToResult( + CONTAINER_ASPECT_NAME, + (dataProcessInstance, dataMap) -> mapContainers(context, dataProcessInstance, dataMap)); return mappingHelper.getResult(); } - private void mapDataProcessProperties( + private void mapTrainingRunProperties( @Nonnull QueryContext context, @Nonnull DataProcessInstance dpi, @Nonnull DataMap dataMap) { + MLTrainingRunProperties trainingProperties = new MLTrainingRunProperties(dataMap); + + com.linkedin.datahub.graphql.generated.MLTrainingRunProperties properties = + new com.linkedin.datahub.graphql.generated.MLTrainingRunProperties(); + if (trainingProperties.hasId()) { + properties.setId(trainingProperties.getId()); + } + if (trainingProperties.hasOutputUrls()) { + properties.setOutputUrls( + trainingProperties.getOutputUrls().stream() + .map(url -> url.toString()) + .collect(Collectors.toList())); + } + if (trainingProperties.getHyperParams() != null) { + properties.setHyperParams( + trainingProperties.getHyperParams().stream() + .map(param -> MLHyperParamMapper.map(context, param)) + .collect(Collectors.toList())); + } + if (trainingProperties.getTrainingMetrics() != null) { + properties.setTrainingMetrics( + trainingProperties.getTrainingMetrics().stream() + .map(metric -> MLMetricMapper.map(context, metric)) + .collect(Collectors.toList())); + } + if (trainingProperties.hasId()) { + properties.setId(trainingProperties.getId()); + } + dpi.setMlTrainingRunProperties(properties); + } + + private void mapDataProcessProperties( + @Nonnull QueryContext context, + @Nonnull DataProcessInstance dpi, + @Nonnull DataMap dataMap, + @Nonnull Urn entityUrn) { DataProcessInstanceProperties dataProcessInstanceProperties = new DataProcessInstanceProperties(dataMap); + + com.linkedin.datahub.graphql.generated.DataProcessInstanceProperties properties = + new com.linkedin.datahub.graphql.generated.DataProcessInstanceProperties(); + dpi.setName(dataProcessInstanceProperties.getName()); - if (dataProcessInstanceProperties.hasCreated()) { - dpi.setCreated(AuditStampMapper.map(context, dataProcessInstanceProperties.getCreated())); - } + properties.setName(dataProcessInstanceProperties.getName()); if (dataProcessInstanceProperties.hasExternalUrl()) { dpi.setExternalUrl(dataProcessInstanceProperties.getExternalUrl().toString()); + properties.setExternalUrl(dataProcessInstanceProperties.getExternalUrl().toString()); + } + if (dataProcessInstanceProperties.hasCustomProperties()) { + properties.setCustomProperties( + CustomPropertiesMapper.map( + dataProcessInstanceProperties.getCustomProperties(), entityUrn)); + } + if (dataProcessInstanceProperties.hasCreated()) { + dpi.setCreated(AuditStampMapper.map(context, dataProcessInstanceProperties.getCreated())); } + dpi.setProperties(properties); } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeUrnMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeUrnMapper.java index 334faf753cb8b5..5b72c2b3c11c5e 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeUrnMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeUrnMapper.java @@ -77,6 +77,9 @@ public class EntityTypeUrnMapper { .put( Constants.BUSINESS_ATTRIBUTE_ENTITY_NAME, "urn:li:entityType:datahub.businessAttribute") + .put( + Constants.DATA_PROCESS_INSTANCE_ENTITY_NAME, + "urn:li:entityType:datahub.dataProcessInstance") .build(); private static final Map ENTITY_TYPE_URN_TO_NAME = diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelPropertiesMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelPropertiesMapper.java index 265005c2caa9ee..7b00fe88f2d683 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelPropertiesMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelPropertiesMapper.java @@ -7,6 +7,7 @@ import com.linkedin.datahub.graphql.generated.MLModelGroup; import com.linkedin.datahub.graphql.generated.MLModelProperties; import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper; +import com.linkedin.datahub.graphql.types.common.mappers.TimeStampToAuditStampMapper; import com.linkedin.datahub.graphql.types.mappers.EmbeddedModelMapper; import java.util.stream.Collectors; import javax.annotation.Nonnull; @@ -31,6 +32,15 @@ public MLModelProperties apply( final MLModelProperties result = new MLModelProperties(); result.setDate(mlModelProperties.getDate()); + if (mlModelProperties.getName() != null) { + result.setName(mlModelProperties.getName()); + } else { + // backfill name from URN for backwards compatibility + result.setName(entityUrn.getEntityKey().get(1)); // indexed access is safe here + } + result.setCreated(TimeStampToAuditStampMapper.map(context, mlModelProperties.getCreated())); + result.setLastModified( + TimeStampToAuditStampMapper.map(context, mlModelProperties.getLastModified())); result.setDescription(mlModelProperties.getDescription()); if (mlModelProperties.getExternalUrl() != null) { result.setExternalUrl(mlModelProperties.getExternalUrl().toString()); diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index adb24d92587b58..b47be7ae32b2c4 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -956,6 +956,16 @@ type Mutation { Remove Business Attribute """ removeBusinessAttribute(input: AddBusinessAttributeInput!): Boolean + + """ + Link the latest versioned entity to a Version Set + """ + linkAssetVersion(input: LinkVersionInput!): String + + """ + Unlink a versioned entity from a Version Set + """ + unlinkAssetVersion(input: UnlinkVersionInput!): Boolean } """ @@ -10098,7 +10108,7 @@ type MLModelProperties { """ The display name of the model used in the UI """ - name: String! + name: String """ Detailed description of the model's purpose and characteristics @@ -12911,6 +12921,56 @@ input ListBusinessAttributesInput { query: String } +""" +Input for linking a versioned entity to a Version Set +""" +input LinkVersionInput { + """ + The target version set + """ + versionSet: String! + + """ + The target versioned entity to link + """ + linkedEntity: String! + + """ + Version Tag label for the version, should be unique within a Version Set + """ + version: String! + + """ + Optional timestamp from the source system + """ + sourceTimestamp: Long + + """ + Optional creator from the source system, will be converted to an Urn + """ + sourceCreator: String + + """ + Optional comment about the version + """ + comment: String +} + +""" +Input for unlinking a versioned entity from a Version Set +""" +input UnlinkVersionInput { + """ + The target version set + """ + versionSet: String + + """ + The target versioned entity to unlink + """ + unlinkedEntity: String +} + """ The result obtained when listing Business Attribute """ diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolverTest.java new file mode 100644 index 00000000000000..690856263fccc5 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/LinkAssetVersionResolverTest.java @@ -0,0 +1,102 @@ +package com.linkedin.datahub.graphql.resolvers.entity.versioning; + +import static com.linkedin.datahub.graphql.TestUtils.*; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.testng.Assert.*; + +import com.google.common.collect.ImmutableList; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.datahub.graphql.generated.LinkVersionInput; +import com.linkedin.metadata.entity.IngestResult; +import com.linkedin.metadata.entity.versioning.EntityVersioningService; +import com.linkedin.metadata.entity.versioning.VersionPropertiesInput; +import graphql.schema.DataFetchingEnvironment; +import org.mockito.Mockito; +import org.testng.annotations.Test; + +public class LinkAssetVersionResolverTest { + + private static final String TEST_VERSION_SET_URN = "urn:li:versionSet:test-version-set"; + private static final String TEST_ENTITY_URN = + "urn:li:dataset:(urn:li:dataPlatform:mysql,my-test,PROD)"; + + @Test + public void testGetSuccessful() throws Exception { + EntityVersioningService mockService = Mockito.mock(EntityVersioningService.class); + FeatureFlags mockFlags = Mockito.mock(FeatureFlags.class); + + Mockito.when(mockFlags.isEntityVersioning()).thenReturn(true); + + IngestResult mockResult = + IngestResult.builder().urn(Urn.createFromString(TEST_ENTITY_URN)).build(); + + Mockito.when( + mockService.linkLatestVersion( + any(), + eq(UrnUtils.getUrn(TEST_VERSION_SET_URN)), + eq(UrnUtils.getUrn(TEST_ENTITY_URN)), + any(VersionPropertiesInput.class))) + .thenReturn(ImmutableList.of(mockResult)); + + LinkAssetVersionResolver resolver = new LinkAssetVersionResolver(mockService, mockFlags); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + LinkVersionInput input = new LinkVersionInput(); + input.setVersionSet(TEST_VERSION_SET_URN); + input.setLinkedEntity(TEST_ENTITY_URN); + input.setComment("Test comment"); + input.setVersion("v1"); + + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + String result = resolver.get(mockEnv).get(); + assertEquals(result, TEST_ENTITY_URN); + } + + @Test + public void testGetFeatureFlagDisabled() throws Exception { + EntityVersioningService mockService = Mockito.mock(EntityVersioningService.class); + FeatureFlags mockFlags = Mockito.mock(FeatureFlags.class); + + Mockito.when(mockFlags.isEntityVersioning()).thenReturn(false); + + LinkAssetVersionResolver resolver = new LinkAssetVersionResolver(mockService, mockFlags); + + // Execute resolver + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + LinkVersionInput input = new LinkVersionInput(); + input.setVersionSet(TEST_VERSION_SET_URN); + input.setLinkedEntity(TEST_ENTITY_URN); + + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + + assertThrows(IllegalAccessError.class, () -> resolver.get(mockEnv)); + } + + @Test + public void testGetInvalidVersionSetUrn() throws Exception { + EntityVersioningService mockService = Mockito.mock(EntityVersioningService.class); + FeatureFlags mockFlags = Mockito.mock(FeatureFlags.class); + + Mockito.when(mockFlags.isEntityVersioning()).thenReturn(true); + + LinkAssetVersionResolver resolver = new LinkAssetVersionResolver(mockService, mockFlags); + + // Execute resolver + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + LinkVersionInput input = new LinkVersionInput(); + input.setVersionSet("urn:li:dataset:invalid-version-set"); // Invalid URN type + input.setLinkedEntity(TEST_ENTITY_URN); + + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + + assertThrows(IllegalArgumentException.class, () -> resolver.get(mockEnv)); + } +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolverTest.java new file mode 100644 index 00000000000000..0000ad24a04537 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/entity/versioning/UnlinkAssetVersionResolverTest.java @@ -0,0 +1,123 @@ +package com.linkedin.datahub.graphql.resolvers.entity.versioning; + +import static com.linkedin.datahub.graphql.TestUtils.*; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.testng.Assert.*; + +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.datahub.graphql.generated.UnlinkVersionInput; +import com.linkedin.metadata.entity.versioning.EntityVersioningService; +import graphql.schema.DataFetchingEnvironment; +import java.util.Collections; +import java.util.concurrent.CompletionException; +import org.mockito.Mockito; +import org.testng.annotations.Test; + +public class UnlinkAssetVersionResolverTest { + + private static final String TEST_VERSION_SET_URN = "urn:li:versionSet:test-version-set"; + private static final String TEST_ENTITY_URN = + "urn:li:dataset:(urn:li:dataPlatform:mysql,my-test,PROD)"; + + @Test + public void testGetSuccessful() throws Exception { + EntityVersioningService mockService = Mockito.mock(EntityVersioningService.class); + FeatureFlags mockFlags = Mockito.mock(FeatureFlags.class); + + Mockito.when(mockFlags.isEntityVersioning()).thenReturn(true); + + Mockito.when( + mockService.unlinkVersion( + any(), + eq(UrnUtils.getUrn(TEST_VERSION_SET_URN)), + eq(UrnUtils.getUrn(TEST_ENTITY_URN)))) + .thenReturn(Collections.emptyList()); + + UnlinkAssetVersionResolver resolver = new UnlinkAssetVersionResolver(mockService, mockFlags); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + UnlinkVersionInput input = new UnlinkVersionInput(); + input.setVersionSet(TEST_VERSION_SET_URN); + input.setUnlinkedEntity(TEST_ENTITY_URN); + + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + assertTrue(resolver.get(mockEnv).get()); + + Mockito.verify(mockService) + .unlinkVersion( + any(), eq(UrnUtils.getUrn(TEST_VERSION_SET_URN)), eq(UrnUtils.getUrn(TEST_ENTITY_URN))); + } + + @Test + public void testGetFeatureFlagDisabled() throws Exception { + EntityVersioningService mockService = Mockito.mock(EntityVersioningService.class); + FeatureFlags mockFlags = Mockito.mock(FeatureFlags.class); + + Mockito.when(mockFlags.isEntityVersioning()).thenReturn(false); + + UnlinkAssetVersionResolver resolver = new UnlinkAssetVersionResolver(mockService, mockFlags); + + // Execute resolver + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + UnlinkVersionInput input = new UnlinkVersionInput(); + input.setVersionSet(TEST_VERSION_SET_URN); + input.setUnlinkedEntity(TEST_ENTITY_URN); + + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + + assertThrows(IllegalAccessError.class, () -> resolver.get(mockEnv)); + } + + @Test + public void testGetInvalidVersionSetUrn() throws Exception { + EntityVersioningService mockService = Mockito.mock(EntityVersioningService.class); + FeatureFlags mockFlags = Mockito.mock(FeatureFlags.class); + + Mockito.when(mockFlags.isEntityVersioning()).thenReturn(true); + + UnlinkAssetVersionResolver resolver = new UnlinkAssetVersionResolver(mockService, mockFlags); + + // Execute resolver + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + UnlinkVersionInput input = new UnlinkVersionInput(); + input.setVersionSet("urn:li:dataset:invalid-version-set"); // Invalid URN type + input.setUnlinkedEntity(TEST_ENTITY_URN); + + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + + assertThrows(IllegalArgumentException.class, () -> resolver.get(mockEnv)); + } + + @Test + public void testGetServiceException() throws Exception { + EntityVersioningService mockService = Mockito.mock(EntityVersioningService.class); + FeatureFlags mockFlags = Mockito.mock(FeatureFlags.class); + + Mockito.when(mockFlags.isEntityVersioning()).thenReturn(true); + + Mockito.doThrow(new RuntimeException("Service error")) + .when(mockService) + .unlinkVersion(any(), any(), any()); + + UnlinkAssetVersionResolver resolver = new UnlinkAssetVersionResolver(mockService, mockFlags); + + // Execute resolver + QueryContext mockContext = getMockAllowContext(); + DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); + UnlinkVersionInput input = new UnlinkVersionInput(); + input.setVersionSet(TEST_VERSION_SET_URN); + input.setUnlinkedEntity(TEST_ENTITY_URN); + + Mockito.when(mockEnv.getArgument(Mockito.eq("input"))).thenReturn(input); + Mockito.when(mockEnv.getContext()).thenReturn(mockContext); + + assertThrows(CompletionException.class, () -> resolver.get(mockEnv).join()); + } +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/common/mappers/TimeStampToAuditStampMapperTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/common/mappers/TimeStampToAuditStampMapperTest.java new file mode 100644 index 00000000000000..4e0dbd7b1733b4 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/common/mappers/TimeStampToAuditStampMapperTest.java @@ -0,0 +1,46 @@ +package com.linkedin.datahub.graphql.types.common.mappers; + +import static org.testng.Assert.*; + +import com.linkedin.common.TimeStamp; +import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.graphql.generated.AuditStamp; +import org.testng.annotations.Test; + +public class TimeStampToAuditStampMapperTest { + + private static final String TEST_ACTOR_URN = "urn:li:corpuser:testUser"; + private static final long TEST_TIME = 1234567890L; + + @Test + public void testMapWithActor() throws Exception { + TimeStamp input = new TimeStamp(); + input.setTime(TEST_TIME); + input.setActor(Urn.createFromString(TEST_ACTOR_URN)); + + AuditStamp result = TimeStampToAuditStampMapper.map(null, input); + + assertNotNull(result); + assertEquals(result.getTime().longValue(), TEST_TIME); + assertEquals(result.getActor(), TEST_ACTOR_URN); + } + + @Test + public void testMapWithoutActor() { + TimeStamp input = new TimeStamp(); + input.setTime(TEST_TIME); + + AuditStamp result = TimeStampToAuditStampMapper.map(null, input); + + assertNotNull(result); + assertEquals(result.getTime().longValue(), TEST_TIME); + assertNull(result.getActor()); + } + + @Test + public void testMapNull() { + AuditStamp result = TimeStampToAuditStampMapper.map(null, null); + + assertNull(result); + } +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataplatforminstance/mapper/DataPlatformInstanceAspectMapperTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataplatforminstance/mapper/DataPlatformInstanceAspectMapperTest.java new file mode 100644 index 00000000000000..479d7340fef945 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataplatforminstance/mapper/DataPlatformInstanceAspectMapperTest.java @@ -0,0 +1,75 @@ +package com.linkedin.datahub.graphql.types.dataplatforminstance.mapper; + +import static org.testng.Assert.*; + +import com.linkedin.common.urn.DataPlatformUrn; +import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.graphql.generated.DataPlatformInstance; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.types.common.mappers.DataPlatformInstanceAspectMapper; +import org.testng.annotations.Test; + +public class DataPlatformInstanceAspectMapperTest { + + private static final String TEST_PLATFORM = "hive"; + private static final String TEST_INSTANCE = "prod"; + private static final String TEST_PLATFORM_URN = "urn:li:dataPlatform:" + TEST_PLATFORM; + private static final String TEST_INSTANCE_URN = + String.format( + "urn:li:dataPlatformInstance:(urn:li:dataPlatform:%s,%s)", TEST_PLATFORM, TEST_INSTANCE); + + @Test + public void testMapWithInstance() throws Exception { + // Create test input + com.linkedin.common.DataPlatformInstance input = new com.linkedin.common.DataPlatformInstance(); + DataPlatformUrn platformUrn = new DataPlatformUrn(TEST_PLATFORM); + Urn instanceUrn = Urn.createFromString(TEST_INSTANCE_URN); + + input.setPlatform(platformUrn); + input.setInstance(instanceUrn); + + // Map and verify + DataPlatformInstance result = DataPlatformInstanceAspectMapper.map(null, input); + + assertNotNull(result); + assertEquals(result.getType(), EntityType.DATA_PLATFORM_INSTANCE); + assertEquals(result.getUrn(), TEST_INSTANCE_URN); + + // Verify platform mapping + assertNotNull(result.getPlatform()); + assertEquals(result.getPlatform().getType(), EntityType.DATA_PLATFORM); + assertEquals(result.getPlatform().getUrn(), TEST_PLATFORM_URN); + } + + @Test + public void testMapWithoutInstance() throws Exception { + // Create test input with only platform + com.linkedin.common.DataPlatformInstance input = new com.linkedin.common.DataPlatformInstance(); + DataPlatformUrn platformUrn = new DataPlatformUrn(TEST_PLATFORM); + input.setPlatform(platformUrn); + + // Map and verify + DataPlatformInstance result = DataPlatformInstanceAspectMapper.map(null, input); + + assertNotNull(result); + assertNull(result.getType()); // Type should be null when no instance + assertNull(result.getUrn()); // URN should be null when no instance + + // Verify platform is still mapped correctly + assertNotNull(result.getPlatform()); + assertEquals(result.getPlatform().getType(), EntityType.DATA_PLATFORM); + assertEquals(result.getPlatform().getUrn(), TEST_PLATFORM_URN); + } + + @Test(expectedExceptions = NullPointerException.class) + public void testMapNull() { + DataPlatformInstanceAspectMapper.map(null, null); + } + + @Test + public void testSingleton() { + assertNotNull(DataPlatformInstanceAspectMapper.INSTANCE); + assertSame( + DataPlatformInstanceAspectMapper.INSTANCE, DataPlatformInstanceAspectMapper.INSTANCE); + } +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceTypeTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceTypeTest.java new file mode 100644 index 00000000000000..437c74ab669146 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceTypeTest.java @@ -0,0 +1,246 @@ +package com.linkedin.datahub.graphql.types.dataprocessinst; + +import static com.linkedin.datahub.graphql.TestUtils.getMockAllowContext; +import static org.mockito.ArgumentMatchers.any; +import static org.testng.Assert.*; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.DataPlatformInstance; +import com.linkedin.common.FabricType; +import com.linkedin.common.Status; +import com.linkedin.common.SubTypes; +import com.linkedin.common.UrnArray; +import com.linkedin.common.urn.DataPlatformUrn; +import com.linkedin.common.urn.DatasetUrn; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.container.Container; +import com.linkedin.data.template.StringArray; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.datahub.graphql.generated.DataProcessInstance; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.dataprocess.DataProcessInstanceInput; +import com.linkedin.dataprocess.DataProcessInstanceOutput; +import com.linkedin.dataprocess.DataProcessInstanceProperties; +import com.linkedin.dataprocess.DataProcessInstanceRelationships; +import com.linkedin.dataprocess.DataProcessInstanceRunEvent; +import com.linkedin.dataprocess.DataProcessRunStatus; +import com.linkedin.dataprocess.DataProcessType; +import com.linkedin.entity.Aspect; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.entity.EnvelopedAspectMap; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.key.DataProcessInstanceKey; +import com.linkedin.ml.metadata.MLTrainingRunProperties; +import com.linkedin.r2.RemoteInvocationException; +import com.linkedin.test.TestResult; +import com.linkedin.test.TestResultArray; +import com.linkedin.test.TestResultType; +import com.linkedin.test.TestResults; +import graphql.execution.DataFetcherResult; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import org.mockito.Mockito; +import org.testng.annotations.Test; + +public class DataProcessInstanceTypeTest { + + private static final String TEST_INSTANCE_URN = + "urn:li:dataProcessInstance:(test-workflow,test-instance-1)"; + private static final String TEST_DPI_1_URN = "urn:li:dataProcessInstance:id-1"; + private static final DatasetUrn DATASET_URN = + new DatasetUrn(new DataPlatformUrn("kafka"), "dataset1", FabricType.TEST); + private static final Urn DPI_URN_REL = UrnUtils.getUrn("urn:li:dataProcessInstance:id-2"); + private static final DataProcessInstanceKey TEST_DPI_1_KEY = + new DataProcessInstanceKey().setId("id-1"); + private static final DataProcessInstanceProperties TEST_DPI_1_PROPERTIES = + new DataProcessInstanceProperties().setName("Test DPI").setType(DataProcessType.STREAMING); + private static final DataProcessInstanceInput TEST_DPI_1_DPI_INPUT = + new DataProcessInstanceInput().setInputs(new UrnArray(ImmutableList.of(DATASET_URN))); + private static final DataProcessInstanceOutput TEST_DPI_1_DPI_OUTPUT = + new DataProcessInstanceOutput().setOutputs(new UrnArray(ImmutableList.of(DATASET_URN))); + private static final DataProcessInstanceRelationships TEST_DPI_1_DPI_RELATIONSHIPS = + new DataProcessInstanceRelationships() + .setParentInstance(DPI_URN_REL) + .setUpstreamInstances(new UrnArray(ImmutableList.of(DPI_URN_REL))) + .setParentTemplate(DPI_URN_REL); + private static final DataProcessInstanceRunEvent TEST_DPI_1_DPI_RUN_EVENT = + new DataProcessInstanceRunEvent().setStatus(DataProcessRunStatus.COMPLETE); + private static final DataPlatformInstance TEST_DPI_1_DATA_PLATFORM_INSTANCE = + new DataPlatformInstance().setPlatform(new DataPlatformUrn("kafka")); + private static final Status TEST_DPI_1_STATUS = new Status().setRemoved(false); + private static final TestResults TEST_DPI_1_TEST_RESULTS = + new TestResults() + .setPassing( + new TestResultArray( + ImmutableList.of( + new TestResult() + .setTest(UrnUtils.getUrn("urn:li:test:123")) + .setType(TestResultType.SUCCESS)))) + .setFailing(new TestResultArray()); + private static final SubTypes TEST_DPI_1_SUB_TYPES = + new SubTypes().setTypeNames(new StringArray("subtype1")); + private static final Container TEST_DPI_1_CONTAINER = + new Container().setContainer(UrnUtils.getUrn("urn:li:container:123")); + private static final MLTrainingRunProperties ML_TRAINING_RUN_PROPERTIES = + new MLTrainingRunProperties().setId("mytrainingrun"); + + private static final String TEST_DPI_2_URN = "urn:li:dataProcessInstance:id-2"; + + @Test + public void testBatchLoadFull() throws Exception { + EntityClient client = Mockito.mock(EntityClient.class); + + Urn dpiUrn1 = Urn.createFromString(TEST_DPI_1_URN); + Urn dpiUrn2 = Urn.createFromString(TEST_DPI_2_URN); + + Map aspectMap = new HashMap<>(); + aspectMap.put( + Constants.DATA_PROCESS_INSTANCE_KEY_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(TEST_DPI_1_KEY.data()))); + aspectMap.put( + Constants.DATA_PROCESS_INSTANCE_PROPERTIES_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(TEST_DPI_1_PROPERTIES.data()))); + aspectMap.put( + Constants.DATA_PROCESS_INSTANCE_INPUT_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(TEST_DPI_1_DPI_INPUT.data()))); + aspectMap.put( + Constants.DATA_PROCESS_INSTANCE_OUTPUT_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(TEST_DPI_1_DPI_OUTPUT.data()))); + aspectMap.put( + Constants.DATA_PROCESS_INSTANCE_RELATIONSHIPS_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(TEST_DPI_1_DPI_RELATIONSHIPS.data()))); + aspectMap.put( + Constants.DATA_PROCESS_INSTANCE_RUN_EVENT_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(TEST_DPI_1_DPI_RUN_EVENT.data()))); + aspectMap.put( + Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(TEST_DPI_1_DATA_PLATFORM_INSTANCE.data()))); + aspectMap.put( + Constants.STATUS_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(TEST_DPI_1_STATUS.data()))); + aspectMap.put( + Constants.TEST_RESULTS_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(TEST_DPI_1_TEST_RESULTS.data()))); + aspectMap.put( + Constants.SUB_TYPES_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(TEST_DPI_1_SUB_TYPES.data()))); + aspectMap.put( + Constants.CONTAINER_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(TEST_DPI_1_CONTAINER.data()))); + aspectMap.put( + Constants.ML_TRAINING_RUN_PROPERTIES_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(ML_TRAINING_RUN_PROPERTIES.data()))); + + Mockito.when( + client.batchGetV2( + any(), + Mockito.eq(Constants.DATA_PROCESS_INSTANCE_ENTITY_NAME), + Mockito.eq(new HashSet<>(ImmutableSet.of(dpiUrn1, dpiUrn2))), + Mockito.eq(DataProcessInstanceType.ASPECTS_TO_FETCH))) + .thenReturn( + ImmutableMap.of( + dpiUrn1, + new EntityResponse() + .setEntityName(Constants.DATA_PROCESS_INSTANCE_ENTITY_NAME) + .setUrn(dpiUrn1) + .setAspects(new EnvelopedAspectMap(aspectMap)))); + + FeatureFlags mockFeatureFlags = Mockito.mock(FeatureFlags.class); + Mockito.when(mockFeatureFlags.isDataProcessInstanceEntityEnabled()).thenReturn(true); + + DataProcessInstanceType type = new DataProcessInstanceType(client, mockFeatureFlags); + + QueryContext mockContext = getMockAllowContext(); + List> result = + type.batchLoad(ImmutableList.of(TEST_DPI_1_URN, TEST_DPI_2_URN), mockContext); + + // Verify response + Mockito.verify(client, Mockito.times(1)) + .batchGetV2( + any(), + Mockito.eq(Constants.DATA_PROCESS_INSTANCE_ENTITY_NAME), + Mockito.eq(ImmutableSet.of(dpiUrn1, dpiUrn2)), + Mockito.eq(DataProcessInstanceType.ASPECTS_TO_FETCH)); + + assertEquals(result.size(), 2); + + DataProcessInstance dpi1 = result.get(0).getData(); + assertEquals(dpi1.getUrn(), TEST_DPI_1_URN); + assertEquals(dpi1.getName(), "Test DPI"); + assertEquals(dpi1.getType(), EntityType.DATA_PROCESS_INSTANCE); + + // Assert second element is null + assertNull(result.get(1)); + } + + @Test + public void testBatchLoad() throws Exception { + EntityClient mockClient = Mockito.mock(EntityClient.class); + FeatureFlags mockFeatureFlags = Mockito.mock(FeatureFlags.class); + Mockito.when(mockFeatureFlags.isDataProcessInstanceEntityEnabled()).thenReturn(true); + + DataProcessInstanceType type = new DataProcessInstanceType(mockClient, mockFeatureFlags); + + List> result = + type.batchLoad(ImmutableList.of(TEST_INSTANCE_URN), getMockAllowContext()); + + assertEquals(result.size(), 1); + } + + @Test + public void testBatchLoadFeatureFlagDisabled() throws Exception { + EntityClient mockClient = Mockito.mock(EntityClient.class); + FeatureFlags mockFeatureFlags = Mockito.mock(FeatureFlags.class); + Mockito.when(mockFeatureFlags.isDataProcessInstanceEntityEnabled()).thenReturn(false); + + DataProcessInstanceType type = new DataProcessInstanceType(mockClient, mockFeatureFlags); + + List> result = + type.batchLoad(ImmutableList.of(TEST_INSTANCE_URN), getMockAllowContext()); + + assertEquals(result.size(), 0); + + Mockito.verify(mockClient, Mockito.never()) + .batchGetV2(any(), Mockito.anyString(), Mockito.anySet(), Mockito.anySet()); + } + + @Test(expectedExceptions = RuntimeException.class) + public void testBatchLoadClientException() throws Exception { + EntityClient mockClient = Mockito.mock(EntityClient.class); + FeatureFlags mockFeatureFlags = Mockito.mock(FeatureFlags.class); + Mockito.when(mockFeatureFlags.isDataProcessInstanceEntityEnabled()).thenReturn(true); + + Mockito.doThrow(RemoteInvocationException.class) + .when(mockClient) + .batchGetV2(any(), Mockito.anyString(), Mockito.anySet(), Mockito.anySet()); + + DataProcessInstanceType type = new DataProcessInstanceType(mockClient, mockFeatureFlags); + type.batchLoad(ImmutableList.of(TEST_INSTANCE_URN), getMockAllowContext()); + } + + @Test + public void testGetType() { + EntityClient mockClient = Mockito.mock(EntityClient.class); + FeatureFlags mockFeatureFlags = Mockito.mock(FeatureFlags.class); + DataProcessInstanceType type = new DataProcessInstanceType(mockClient, mockFeatureFlags); + + assertEquals(type.type(), EntityType.DATA_PROCESS_INSTANCE); + } + + @Test + public void testObjectClass() { + EntityClient mockClient = Mockito.mock(EntityClient.class); + FeatureFlags mockFeatureFlags = Mockito.mock(FeatureFlags.class); + DataProcessInstanceType type = new DataProcessInstanceType(mockClient, mockFeatureFlags); + + assertEquals(type.objectClass(), DataProcessInstance.class); + } +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapperTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapperTest.java new file mode 100644 index 00000000000000..dc1ce935ad5ecd --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapperTest.java @@ -0,0 +1,127 @@ +package com.linkedin.datahub.graphql.types.dataprocessinst.mappers; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; + +import com.linkedin.common.DataPlatformInstance; +import com.linkedin.common.url.Url; +import com.linkedin.common.urn.Urn; +import com.linkedin.container.Container; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.data.template.StringArray; +import com.linkedin.datahub.graphql.generated.DataProcessInstance; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.dataprocess.DataProcessInstanceProperties; +import com.linkedin.entity.Aspect; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.entity.EnvelopedAspectMap; +import com.linkedin.metadata.Constants; +import com.linkedin.ml.metadata.MLTrainingRunProperties; +import java.util.HashMap; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +public class DataProcessInstanceMapperTest { + + private static final String TEST_PLATFORM_URN = "urn:li:dataPlatform:kafka"; + private static final String TEST_INSTANCE_URN = + "urn:li:dataProcessInstance:(test-workflow,test-instance)"; + private static final String TEST_CONTAINER_URN = "urn:li:container:testContainer"; + private static final String TEST_EXTERNAL_URL = "https://example.com/process"; + private static final String TEST_NAME = "Test Process Instance"; + + private EntityResponse entityResponse; + private Urn urn; + + @BeforeMethod + public void setup() throws Exception { + urn = Urn.createFromString(TEST_INSTANCE_URN); + entityResponse = new EntityResponse(); + entityResponse.setUrn(urn); + entityResponse.setAspects(new EnvelopedAspectMap(new HashMap<>())); + } + + @Test + public void testMapBasicFields() throws Exception { + DataProcessInstance instance = DataProcessInstanceMapper.map(null, entityResponse); + + assertNotNull(instance); + assertEquals(instance.getUrn(), urn.toString()); + assertEquals(instance.getType(), EntityType.DATA_PROCESS_INSTANCE); + } + + @Test + public void testMapDataProcessProperties() throws Exception { + // Create DataProcessInstanceProperties + DataProcessInstanceProperties properties = new DataProcessInstanceProperties(); + properties.setName(TEST_NAME); + properties.setExternalUrl(new Url(TEST_EXTERNAL_URL)); + + // Add properties aspect + addAspect(Constants.DATA_PROCESS_INSTANCE_PROPERTIES_ASPECT_NAME, properties); + + DataProcessInstance instance = DataProcessInstanceMapper.map(null, entityResponse); + + assertNotNull(instance.getProperties()); + assertEquals(instance.getName(), TEST_NAME); + assertEquals(instance.getExternalUrl(), TEST_EXTERNAL_URL); + } + + @Test + public void testMapPlatformInstance() throws Exception { + // Create DataPlatformInstance + DataPlatformInstance platformInstance = new DataPlatformInstance(); + platformInstance.setPlatform(Urn.createFromString(TEST_PLATFORM_URN)); + + // Add platform instance aspect + addAspect(Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME, platformInstance); + + DataProcessInstance instance = DataProcessInstanceMapper.map(null, entityResponse); + + assertNotNull(instance.getDataPlatformInstance()); + assertNotNull(instance.getPlatform()); + assertEquals(instance.getPlatform().getUrn(), TEST_PLATFORM_URN); + assertEquals(instance.getPlatform().getType(), EntityType.DATA_PLATFORM); + } + + @Test + public void testMapContainer() throws Exception { + // Create Container aspect + Container container = new Container(); + container.setContainer(Urn.createFromString(TEST_CONTAINER_URN)); + + // Add container aspect + addAspect(Constants.CONTAINER_ASPECT_NAME, container); + + DataProcessInstance instance = DataProcessInstanceMapper.map(null, entityResponse); + + assertNotNull(instance.getContainer()); + assertEquals(instance.getContainer().getUrn(), TEST_CONTAINER_URN); + assertEquals(instance.getContainer().getType(), EntityType.CONTAINER); + } + + @Test + public void testMapMLTrainingProperties() throws Exception { + // Create MLTrainingRunProperties + MLTrainingRunProperties trainingProperties = new MLTrainingRunProperties(); + trainingProperties.setId("test-run-id"); + trainingProperties.setOutputUrls(new StringArray("s3://test-bucket/model")); + + // Add ML training properties aspect + addAspect(Constants.ML_TRAINING_RUN_PROPERTIES_ASPECT_NAME, trainingProperties); + + DataProcessInstance instance = DataProcessInstanceMapper.map(null, entityResponse); + + assertNotNull(instance); + assertEquals(instance.getMlTrainingRunProperties().getId(), "test-run-id"); + assertEquals( + instance.getMlTrainingRunProperties().getOutputUrls().get(0), "s3://test-bucket/model"); + } + + private void addAspect(String aspectName, RecordTemplate aspect) { + EnvelopedAspect envelopedAspect = new EnvelopedAspect(); + envelopedAspect.setValue(new Aspect(aspect.data())); + entityResponse.getAspects().put(aspectName, envelopedAspect); + } +} diff --git a/datahub-web-react/src/app/buildEntityRegistry.ts b/datahub-web-react/src/app/buildEntityRegistry.ts index 181ec7d328a587..b7ff97b3a07469 100644 --- a/datahub-web-react/src/app/buildEntityRegistry.ts +++ b/datahub-web-react/src/app/buildEntityRegistry.ts @@ -25,6 +25,7 @@ import { RestrictedEntity } from './entity/restricted/RestrictedEntity'; import { BusinessAttributeEntity } from './entity/businessAttribute/BusinessAttributeEntity'; import { SchemaFieldPropertiesEntity } from './entity/schemaField/SchemaFieldPropertiesEntity'; import { StructuredPropertyEntity } from './entity/structuredProperty/StructuredPropertyEntity'; +import { DataProcessInstanceEntity } from './entity/dataProcessInstance/DataProcessInstanceEntity'; export default function buildEntityRegistry() { const registry = new EntityRegistry(); @@ -54,5 +55,6 @@ export default function buildEntityRegistry() { registry.register(new BusinessAttributeEntity()); registry.register(new SchemaFieldPropertiesEntity()); registry.register(new StructuredPropertyEntity()); + registry.register(new DataProcessInstanceEntity()); return registry; } diff --git a/datahub-web-react/src/app/entity/dataProcessInstance/DataProcessInstanceEntity.tsx b/datahub-web-react/src/app/entity/dataProcessInstance/DataProcessInstanceEntity.tsx new file mode 100644 index 00000000000000..4834a026ad94a3 --- /dev/null +++ b/datahub-web-react/src/app/entity/dataProcessInstance/DataProcessInstanceEntity.tsx @@ -0,0 +1,264 @@ +import React from 'react'; +import { ApiOutlined } from '@ant-design/icons'; +import { + DataProcessInstance, + Entity as GeneratedEntity, + EntityType, + OwnershipType, + SearchResult, +} from '../../../types.generated'; +import { Preview } from './preview/Preview'; +import { Entity, EntityCapabilityType, IconStyleType, PreviewType } from '../Entity'; +import { EntityProfile } from '../shared/containers/profile/EntityProfile'; +import { useGetDataProcessInstanceQuery } from '../../../graphql/dataProcessInstance.generated'; +import { PropertiesTab } from '../shared/tabs/Properties/PropertiesTab'; +import { LineageTab } from '../shared/tabs/Lineage/LineageTab'; +import { SidebarAboutSection } from '../shared/containers/profile/sidebar/AboutSection/SidebarAboutSection'; +import { SidebarTagsSection } from '../shared/containers/profile/sidebar/SidebarTagsSection'; +import { SidebarOwnerSection } from '../shared/containers/profile/sidebar/Ownership/sidebar/SidebarOwnerSection'; +import { GenericEntityProperties } from '../shared/types'; +import { getDataForEntityType } from '../shared/containers/profile/utils'; +import { SidebarDomainSection } from '../shared/containers/profile/sidebar/Domain/SidebarDomainSection'; +import { EntityMenuItems } from '../shared/EntityDropdown/EntityDropdown'; +import { capitalizeFirstLetterOnly } from '../../shared/textUtil'; +import DataProductSection from '../shared/containers/profile/sidebar/DataProduct/DataProductSection'; +import { getDataProduct } from '../shared/utils'; +// import SummaryTab from './profile/DataProcessInstaceSummary'; + +// const getProcessPlatformName = (data?: DataProcessInstance): string => { +// return ( +// data?.dataPlatformInstance?.platform?.properties?.displayName || +// capitalizeFirstLetterOnly(data?.dataPlatformInstance?.platform?.name) || +// '' +// ); +// }; + +const getParentEntities = (data: DataProcessInstance): GeneratedEntity[] => { + const parentEntity = data?.relationships?.relationships?.find( + (rel) => rel.type === 'InstanceOf' && rel.entity?.type === EntityType.DataJob, + ); + + if (!parentEntity?.entity) return []; + + // Convert to GeneratedEntity + return [ + { + type: parentEntity.entity.type, + urn: (parentEntity.entity as any).urn, // Make sure urn exists + relationships: (parentEntity.entity as any).relationships, + }, + ]; +}; +/** + * Definition of the DataHub DataProcessInstance entity. + */ +export class DataProcessInstanceEntity implements Entity { + type: EntityType = EntityType.DataProcessInstance; + + icon = (fontSize: number, styleType: IconStyleType, color?: string) => { + if (styleType === IconStyleType.TAB_VIEW) { + return ; + } + + if (styleType === IconStyleType.HIGHLIGHT) { + return ; + } + + return ( + + ); + }; + + isSearchEnabled = () => true; + + isBrowseEnabled = () => true; + + isLineageEnabled = () => true; + + getAutoCompleteFieldName = () => 'name'; + + getPathName = () => 'dataProcessInstance'; + + getEntityName = () => 'Process Instance'; + + getGraphName = () => 'dataProcessInstance'; + + getCollectionName = () => 'Process Instances'; + + useEntityQuery = useGetDataProcessInstanceQuery; + + renderProfile = (urn: string) => ( + { + // const activeIncidentCount = processInstance?.dataProcessInstance?.activeIncidents.total; + // return `Incidents${(activeIncidentCount && ` (${activeIncidentCount})`) || ''}`; + // }, + // }, + ]} + sidebarSections={this.getSidebarSections()} + /> + ); + + getSidebarSections = () => [ + { + component: SidebarAboutSection, + }, + { + component: SidebarOwnerSection, + properties: { + defaultOwnerType: OwnershipType.TechnicalOwner, + }, + }, + { + component: SidebarTagsSection, + properties: { + hasTags: true, + hasTerms: true, + }, + }, + { + component: SidebarDomainSection, + }, + { + component: DataProductSection, + }, + ]; + + getOverridePropertiesFromEntity = (processInstance?: DataProcessInstance | null): GenericEntityProperties => { + const name = processInstance?.name; + const externalUrl = processInstance?.externalUrl; + return { + name, + externalUrl, + }; + }; + + renderPreview = (_: PreviewType, data: DataProcessInstance) => { + const genericProperties = this.getGenericEntityProperties(data); + const parentEntities = getParentEntities(data); + return ( + + ); + }; + + renderSearch = (result: SearchResult) => { + const data = result.entity as DataProcessInstance; + const genericProperties = this.getGenericEntityProperties(data); + const parentEntities = getParentEntities(data); + return ( + + ); + }; + + getLineageVizConfig = (entity: DataProcessInstance) => { + return { + urn: entity?.urn, + name: this.displayName(entity), + type: EntityType.DataProcessInstance, + subtype: entity?.subTypes?.typeNames?.[0], + icon: entity?.platform?.properties?.logoUrl || undefined, + platform: entity?.platform, + container: entity?.container, + // health: entity?.health || undefined, + }; + }; + + displayName = (data: DataProcessInstance) => { + return data.properties?.name || data.urn; + }; + + getGenericEntityProperties = (data: DataProcessInstance) => { + return getDataForEntityType({ + data, + entityType: this.type, + getOverrideProperties: this.getOverridePropertiesFromEntity, + }); + }; + + supportedCapabilities = () => { + return new Set([ + EntityCapabilityType.OWNERS, + EntityCapabilityType.GLOSSARY_TERMS, + EntityCapabilityType.TAGS, + EntityCapabilityType.DOMAINS, + EntityCapabilityType.DEPRECATION, + EntityCapabilityType.SOFT_DELETE, + EntityCapabilityType.DATA_PRODUCTS, + ]); + }; +} diff --git a/datahub-web-react/src/app/entity/dataProcessInstance/preview/Preview.tsx b/datahub-web-react/src/app/entity/dataProcessInstance/preview/Preview.tsx new file mode 100644 index 00000000000000..3a3b0340695d96 --- /dev/null +++ b/datahub-web-react/src/app/entity/dataProcessInstance/preview/Preview.tsx @@ -0,0 +1,103 @@ +import React from 'react'; +import { + DataProduct, + Deprecation, + Domain, + Entity as GeneratedEntity, + EntityPath, + EntityType, + GlobalTags, + Health, + Owner, + SearchInsight, + Container, + ParentContainersResult, +} from '../../../../types.generated'; +import DefaultPreviewCard from '../../../preview/DefaultPreviewCard'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { IconStyleType } from '../../Entity'; + +export const Preview = ({ + urn, + name, + subType, + description, + platformName, + platformLogo, + platformInstanceId, + container, + owners, + domain, + dataProduct, + deprecation, + globalTags, + snippet, + insights, + externalUrl, + degree, + paths, + health, + parentEntities, + parentContainers, +}: // duration, +// status, +// startTime, +{ + urn: string; + name: string; + subType?: string | null; + description?: string | null; + platformName?: string; + platformLogo?: string | null; + platformInstanceId?: string; + container?: Container; + owners?: Array | null; + domain?: Domain | null; + dataProduct?: DataProduct | null; + deprecation?: Deprecation | null; + globalTags?: GlobalTags | null; + snippet?: React.ReactNode | null; + insights?: Array | null; + externalUrl?: string | null; + degree?: number; + paths?: EntityPath[]; + health?: Health[] | null; + parentEntities?: Array | null; + parentContainers?: ParentContainersResult | null; + // duration?: number | null; + // status?: string | null; + // startTime?: number | null; +}): JSX.Element => { + const entityRegistry = useEntityRegistry(); + return ( + + ); +}; diff --git a/datahub-web-react/src/graphql/dataProcessInstance.graphql b/datahub-web-react/src/graphql/dataProcessInstance.graphql new file mode 100644 index 00000000000000..8f55ca4903d527 --- /dev/null +++ b/datahub-web-react/src/graphql/dataProcessInstance.graphql @@ -0,0 +1,181 @@ +fragment processInstanceRelationshipResults on EntityRelationshipsResult { + start + count + total + relationships { + type + direction + entity { + urn + type + ... on Dataset { + name + properties { + name + description + qualifiedName + } + editableProperties { + description + } + platform { + ...platformFields + } + subTypes { + typeNames + } + status { + removed + } + } + ... on DataJob { + urn + type + dataFlow { + ...nonRecursiveDataFlowFields + } + jobId + properties { + name + description + externalUrl + customProperties { + key + value + } + } + deprecation { + ...deprecationFields + } + dataPlatformInstance { + ...dataPlatformInstanceFields + } + subTypes { + typeNames + } + editableProperties { + description + } + status { + removed + } + } + } + } +} + +fragment dataProcessInstanceFields on DataProcessInstance { + urn + type + platform { + ...platformFields + } + parentContainers { + ...parentContainersFields + } + container { + ...entityContainer + } + subTypes { + typeNames + } + properties { + name + createdTS: created { + time + actor + } + customProperties { + key + value + } + } + mlTrainingRunProperties { + outputUrls + trainingMetrics { + name + description + value + } + hyperParams { + name + description + value + } + } + dataPlatformInstance { + ...dataPlatformInstanceFields + } + state(startTimeMillis: null, endTimeMillis: null, limit: 1) { + status + attempt + result { + resultType + nativeResultType + } + timestampMillis + durationMillis + } + relationships(input: { types: ["InstanceOf", "Consumes", "Produces"], direction: OUTGOING, start: 0, count: 50 }) { + ...processInstanceRelationshipResults + } +} + +query getDataProcessInstance($urn: String!) { + dataProcessInstance(urn: $urn) { + urn + type + platform { + ...platformFields + } + parentContainers { + ...parentContainersFields + } + subTypes { + typeNames + } + container { + ...entityContainer + } + name + properties { + name + created { + time + actor + } + } + mlTrainingRunProperties { + id + outputUrls + trainingMetrics { + name + description + value + } + hyperParams { + name + description + value + } + } + relationships( + input: { types: ["InstanceOf", "Consumes", "Produces"], direction: OUTGOING, start: 0, count: 50 } + ) { + ...processInstanceRelationshipResults + } + dataPlatformInstance { + ...dataPlatformInstanceFields + } + state(startTimeMillis: null, endTimeMillis: null, limit: 1) { + status + attempt + result { + resultType + nativeResultType + } + timestampMillis + durationMillis + } + } +} diff --git a/datahub-web-react/src/graphql/fragments.graphql b/datahub-web-react/src/graphql/fragments.graphql index 68c57c5cb5db55..ecac2997489354 100644 --- a/datahub-web-react/src/graphql/fragments.graphql +++ b/datahub-web-react/src/graphql/fragments.graphql @@ -863,8 +863,17 @@ fragment nonRecursiveMLModel on MLModel { ...ownershipFields } properties { + name description date + created { + time + actor + } + lastModified { + time + actor + } externalUrl version type @@ -956,7 +965,12 @@ fragment nonRecursiveMLModelGroupFields on MLModelGroup { ...deprecationFields } properties { + name description + created { + time + actor + } } browsePathV2 { ...browsePathV2Fields diff --git a/datahub-web-react/src/graphql/lineage.graphql b/datahub-web-react/src/graphql/lineage.graphql index ee05811cbb72de..457936ed62cd2e 100644 --- a/datahub-web-react/src/graphql/lineage.graphql +++ b/datahub-web-react/src/graphql/lineage.graphql @@ -259,6 +259,9 @@ fragment lineageNodeProperties on EntityWithRelationships { name description origin + tags { + ...globalTagsFields + } platform { ...platformFields } @@ -268,6 +271,19 @@ fragment lineageNodeProperties on EntityWithRelationships { status { removed } + properties { + createdTS: created { + time + actor + } + customProperties { + key + value + } + } + editableProperties { + description + } structuredProperties { properties { ...structuredPropertiesFields @@ -328,6 +344,9 @@ fragment lineageNodeProperties on EntityWithRelationships { urn type } + ... on DataProcessInstance { + ...dataProcessInstanceFields + } } fragment lineageFields on EntityWithRelationships { diff --git a/datahub-web-react/src/graphql/mlModelGroup.graphql b/datahub-web-react/src/graphql/mlModelGroup.graphql index 81ab65d0b9a08d..4f11ed4984d37a 100644 --- a/datahub-web-react/src/graphql/mlModelGroup.graphql +++ b/datahub-web-react/src/graphql/mlModelGroup.graphql @@ -2,6 +2,18 @@ query getMLModelGroup($urn: String!) { mlModelGroup(urn: $urn) { urn type + properties { + name + description + created { + time + actor + } + lastModified { + time + actor + } + } ...nonRecursiveMLModelGroupFields incoming: relationships( input: { diff --git a/docker/build.gradle b/docker/build.gradle index 7b36c0d9acdcf0..0070d814286cf0 100644 --- a/docker/build.gradle +++ b/docker/build.gradle @@ -42,6 +42,13 @@ ext { modules: python_services_modules + backend_profile_modules + [':datahub-frontend'], isDebug: true ], + 'quickstartDebugConsumers': [ + profile: 'debug-consumers', + modules: python_services_modules + backend_profile_modules + [':datahub-frontend', + ':metadata-jobs:mce-consumer-job', + ':metadata-jobs:mae-consumer-job'], + isDebug: true + ], 'quickstartPg': [ profile: 'quickstart-postgres', modules: (backend_profile_modules - [':docker:mysql-setup']) + [ @@ -99,9 +106,7 @@ dockerCompose { } // Common environment variables - environment.put 'DATAHUB_VERSION', config.isDebug ? - System.getenv("DATAHUB_VERSION") ?: "v${version}" : - "v${version}" + environment.put 'DATAHUB_VERSION', System.getenv("DATAHUB_VERSION") ?: "v${version}" environment.put 'DATAHUB_TELEMETRY_ENABLED', 'false' environment.put "METADATA_TESTS_ENABLED", "true" environment.put "DATAHUB_REPO", "${docker_registry}" diff --git a/docker/profiles/README.md b/docker/profiles/README.md index fb3c9e3c84a7a2..192fde3130a895 100644 --- a/docker/profiles/README.md +++ b/docker/profiles/README.md @@ -101,4 +101,30 @@ Runs everything except for the GMS. Useful for running just a local (non-docker) | debug-cassandra | | | X | | X | X | X | X | | | X | X | | | debug-consumers | X | | | | X | X | X | X | X | X | X | X | | | debug-neo4j | X | | | X | X | X | X | X | | | X | X | | -| debug-elasticsearch | X | | | | X | X | X | X | | | X | | X | \ No newline at end of file +| debug-elasticsearch | X | | | | X | X | X | X | | | X | | X | + +## Advanced Setups + +### Version Mixing + +In some cases, it might be useful to debug upgrade scenarios where there are intentional version miss-matches. It is possible +to override individual component versions. + +Note: This only works for `non-debug` profiles because of the file mounts when in `debug` which would run older containers +but still pickup the latest application jars. + +In this example we are interested in upgrading two components (the `mae-consumer` and the `mce-consumer`) to a fresh build `v0.15.1-SNAPSHOT` +while maintaining older components on `v0.14.1` (especially the `system-update` container). + +This configuration reproduces the situation where the consumers were upgraded prior to running the latest version of `system-update`. In this +scenario we expect the consumers to block their startup waiting for the successful completion of a newer `system-update`. + +`DATAHUB_VERSION` - specifies the default component version of `v0.14.1` +`DATAHUB_MAE_VERSION` - specifies an override of just the `mae-consumer` to version `v0.15.1-SNAPSHOT`[1] +`DATAHUB_MCE_VERSION` - specifies an override of just the `mce-consumer` to version `v0.15.1-SNAPSHOT`[1] + +```shell + DATAHUB_MAE_VERSION="v0.15.1-SNAPSHOT" DATAHUB_MCE_VERSION="v0.15.1-SNAPSHOT" DATAHUB_VERSION="v0.14.1" ./gradlew quickstart +``` + +[1] Image versions were `v0.15.1-SNAPSHOT` built locally prior to running the command. diff --git a/docker/profiles/docker-compose.gms.yml b/docker/profiles/docker-compose.gms.yml index 64163ef970080a..2147d6b5a0247f 100644 --- a/docker/profiles/docker-compose.gms.yml +++ b/docker/profiles/docker-compose.gms.yml @@ -54,7 +54,7 @@ x-datahub-dev-telemetry-env: &datahub-dev-telemetry-env ################################# x-datahub-system-update-service: &datahub-system-update-service hostname: datahub-system-update - image: ${DATAHUB_UPGRADE_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-upgrade}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_UPGRADE_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-upgrade}:${DATAHUB_UPDATE_VERSION:-${DATAHUB_VERSION:-head}} command: - -u - SystemUpdate @@ -67,12 +67,13 @@ x-datahub-system-update-service: &datahub-system-update-service SCHEMA_REGISTRY_SYSTEM_UPDATE: ${SCHEMA_REGISTRY_SYSTEM_UPDATE:-true} SPRING_KAFKA_PROPERTIES_AUTO_REGISTER_SCHEMAS: ${SPRING_KAFKA_PROPERTIES_AUTO_REGISTER_SCHEMAS:-true} SPRING_KAFKA_PROPERTIES_USE_LATEST_VERSION: ${SPRING_KAFKA_PROPERTIES_USE_LATEST_VERSION:-true} + ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} volumes: - ${HOME}/.datahub/plugins:/etc/datahub/plugins x-datahub-system-update-service-dev: &datahub-system-update-service-dev <<: *datahub-system-update-service - image: ${DATAHUB_UPGRADE_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-upgrade}:${DATAHUB_VERSION:-debug} + image: ${DATAHUB_UPGRADE_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-upgrade}:${DATAHUB_UPDATE_VERSION:-${DATAHUB_VERSION:-debug}} ports: - ${DATAHUB_MAPPED_UPGRADE_DEBUG_PORT:-5003}:5003 environment: &datahub-system-update-dev-env @@ -80,6 +81,7 @@ x-datahub-system-update-service-dev: &datahub-system-update-service-dev SKIP_ELASTICSEARCH_CHECK: false REPROCESS_DEFAULT_BROWSE_PATHS_V2: ${REPROCESS_DEFAULT_BROWSE_PATHS_V2:-false} JAVA_TOOL_OPTIONS: '-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5003' + ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} volumes: - ../../datahub-upgrade/build/libs/:/datahub/datahub-upgrade/bin/ - ../../metadata-models/src/main/resources/:/datahub/datahub-gms/resources @@ -90,7 +92,7 @@ x-datahub-system-update-service-dev: &datahub-system-update-service-dev ################################# x-datahub-gms-service: &datahub-gms-service hostname: datahub-gms - image: ${DATAHUB_GMS_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-gms}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_GMS_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-gms}:${DATAHUB_GMS_VERSION:-${DATAHUB_VERSION:-head}} ports: - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 env_file: @@ -101,6 +103,7 @@ x-datahub-gms-service: &datahub-gms-service <<: [*primary-datastore-mysql-env, *graph-datastore-search-env, *search-datastore-env, *datahub-quickstart-telemetry-env, *kafka-env] ALTERNATE_MCP_VALIDATION: ${ALTERNATE_MCP_VALIDATION:-true} STRICT_URN_VALIDATION_ENABLED: ${STRICT_URN_VALIDATION_ENABLED:-true} + ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} healthcheck: test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health start_period: 90s @@ -115,7 +118,7 @@ x-datahub-gms-service: &datahub-gms-service x-datahub-gms-service-dev: &datahub-gms-service-dev <<: *datahub-gms-service - image: ${DATAHUB_GMS_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-gms}:${DATAHUB_VERSION:-debug} + image: ${DATAHUB_GMS_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-gms}:${DATAHUB_GMS_VERSION:-${DATAHUB_VERSION:-debug}} ports: - ${DATAHUB_MAPPED_GMS_DEBUG_PORT:-5001}:5001 - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 @@ -131,6 +134,7 @@ x-datahub-gms-service-dev: &datahub-gms-service-dev SEARCH_SERVICE_ENABLE_CACHE: false LINEAGE_SEARCH_CACHE_ENABLED: false SHOW_BROWSE_V2: true + ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} volumes: - ./datahub-gms/start.sh:/datahub/datahub-gms/scripts/start.sh - ./datahub-gms/jetty.xml:/datahub/datahub-gms/scripts/jetty.xml @@ -146,7 +150,7 @@ x-datahub-gms-service-dev: &datahub-gms-service-dev ################################# x-datahub-mae-consumer-service: &datahub-mae-consumer-service hostname: datahub-mae-consumer - image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mae-consumer}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mae-consumer}:${DATAHUB_MAE_VERSION:-${DATAHUB_VERSION:-head}} ports: - 9091:9091 env_file: @@ -155,12 +159,14 @@ x-datahub-mae-consumer-service: &datahub-mae-consumer-service - ${DATAHUB_LOCAL_MAE_ENV:-empty2.env} environment: &datahub-mae-consumer-env <<: [*primary-datastore-mysql-env, *graph-datastore-search-env, *search-datastore-env, *kafka-env] + ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} x-datahub-mae-consumer-service-dev: &datahub-mae-consumer-service-dev <<: *datahub-mae-consumer-service - image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mae-consumer}:${DATAHUB_VERSION:-debug} + image: ${DATAHUB_MAE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mae-consumer}:${DATAHUB_MAE_VERSION:-${DATAHUB_VERSION:-debug}} environment: <<: [*datahub-dev-telemetry-env, *datahub-mae-consumer-env] + ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} volumes: - ./datahub-mae-consumer/start.sh:/datahub/datahub-mae-consumer/scripts/start.sh - ../../metadata-models/src/main/resources/:/datahub/datahub-mae-consumer/resources @@ -172,7 +178,7 @@ x-datahub-mae-consumer-service-dev: &datahub-mae-consumer-service-dev ################################# x-datahub-mce-consumer-service: &datahub-mce-consumer-service hostname: datahub-mce-consumer - image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mce-consumer}:${DATAHUB_VERSION:-head} + image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mce-consumer}:${DATAHUB_MCE_VERSION:-${DATAHUB_VERSION:-head}} ports: - 9090:9090 env_file: @@ -183,12 +189,14 @@ x-datahub-mce-consumer-service: &datahub-mce-consumer-service <<: [*primary-datastore-mysql-env, *graph-datastore-search-env, *search-datastore-env, *datahub-quickstart-telemetry-env, *kafka-env] ALTERNATE_MCP_VALIDATION: ${ALTERNATE_MCP_VALIDATION:-true} STRICT_URN_VALIDATION_ENABLED: ${STRICT_URN_VALIDATION_ENABLED:-true} + ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} x-datahub-mce-consumer-service-dev: &datahub-mce-consumer-service-dev <<: *datahub-mce-consumer-service - image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mce-consumer}:${DATAHUB_VERSION:-debug} + image: ${DATAHUB_MCE_CONSUMER_IMAGE:-${DATAHUB_REPO:-acryldata}/datahub-mce-consumer}:${DATAHUB_MCE_VERSION:-${DATAHUB_VERSION:-debug}} environment: <<: [*datahub-dev-telemetry-env, *datahub-mce-consumer-env] + ENTITY_VERSIONING_ENABLED: ${ENTITY_VERSIONING_ENABLED:-true} volumes: - ./datahub-mce-consumer/start.sh:/datahub/datahub-mce-consumer/scripts/start.sh - ../../metadata-jobs/mce-consumer-job/build/libs/:/datahub/datahub-mce-consumer/bin diff --git a/docs-website/docusaurus.config.js b/docs-website/docusaurus.config.js index 31644f459ed731..350521ea8ee643 100644 --- a/docs-website/docusaurus.config.js +++ b/docs-website/docusaurus.config.js @@ -13,6 +13,15 @@ module.exports = { projectName: "datahub", // Usually your repo name. staticDirectories: ["static", "genStatic"], stylesheets: ["https://fonts.googleapis.com/css2?family=Manrope:wght@400;500;700&display=swap"], + headTags: [ + { + tagName: 'meta', + attributes: { + httpEquiv: 'Content-Security-Policy', + content: "frame-ancestors 'self' https://*.acryl.io https://acryldata.io http://localhost:*" + } + }, + ], scripts: [ { src: "https://tools.luckyorange.com/core/lo.js?site-id=28ea8a38", diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 68b41c907c6ad6..eb5a792216d981 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -1,3 +1,8 @@ +# Known Issues + +- Async APIs - DataHub's asynchronous APIs perform only basic schema validation when receiving MCP requests, similar to direct production to MCP Kafka topics. While requests must conform to the MCP schema to be accepted, actual processing happens later in the pipeline. Any processing failures that occur after the initial acceptance are captured in the Failed MCP topic, but these failures are not immediately surfaced to the API caller since they happen asynchronously. + + # Updating DataHub Entity components - entityRegistry.getEntitySpecs().values().stream() - .filter(e -> aspectNames.contains(e.getKeyAspectName())) - .forEach( - e -> { - final String entityName = toUpperFirst(e.getName()); - components.addSchemas( - entityName + ENTITY_REQUEST_SUFFIX, buildEntitySchema(e, aspectNames, false)); - components.addSchemas( - entityName + ENTITY_RESPONSE_SUFFIX, buildEntitySchema(e, aspectNames, true)); - components.addSchemas( - "Scroll" + entityName + ENTITY_RESPONSE_SUFFIX, buildEntityScrollSchema(e)); - components.addSchemas( - "BatchGet" + entityName + ENTITY_REQUEST_SUFFIX, - buildEntityBatchGetRequestSchema(e, aspectNames)); - }); + definedEntitySpecs.forEach( + e -> { + final String entityName = toUpperFirst(e.getName()); + components.addSchemas( + entityName + ENTITY_REQUEST_SUFFIX, buildEntitySchema(e, aspectNames, false)); + components.addSchemas( + entityName + ENTITY_RESPONSE_SUFFIX, buildEntitySchema(e, aspectNames, true)); + components.addSchemas( + "Scroll" + entityName + ENTITY_RESPONSE_SUFFIX, buildEntityScrollSchema(e)); + components.addSchemas( + "BatchGet" + entityName + ENTITY_REQUEST_SUFFIX, + buildEntityBatchGetRequestSchema(e, aspectNames)); + }); components.addSchemas("SortOrder", new Schema()._enum(List.of("ASCENDING", "DESCENDING"))); // TODO: Correct handling of SystemMetadata and AuditStamp @@ -151,14 +158,12 @@ public static OpenAPI generateOpenApiSpec(EntityRegistry entityRegistry) { // Parameters // --> Entity Parameters - entityRegistry.getEntitySpecs().values().stream() - .filter(e -> definitionNames.contains(e.getKeyAspectName())) - .forEach( - e -> { - final String parameterName = toUpperFirst(e.getName()) + ASPECTS; - components.addParameters( - parameterName + MODEL_VERSION, buildParameterSchema(e, definitionNames)); - }); + definedEntitySpecs.forEach( + e -> { + final String parameterName = toUpperFirst(e.getName()) + ASPECTS; + components.addParameters( + parameterName + MODEL_VERSION, buildParameterSchema(e, definitionNames)); + }); addExtraParameters(components); @@ -169,39 +174,56 @@ public static OpenAPI generateOpenApiSpec(EntityRegistry entityRegistry) { paths.addPathItem("/v3/entity/scroll", buildGenericListEntitiesPath()); // --> Entity Paths - entityRegistry.getEntitySpecs().values().stream() - .filter(e -> definitionNames.contains(e.getName())) - .sorted(Comparator.comparing(EntitySpec::getName)) - .forEach( - e -> { - paths.addPathItem( - String.format("/v3/entity/%s", e.getName().toLowerCase()), - buildListEntityPath(e)); - paths.addPathItem( - String.format("/v3/entity/%s/batchGet", e.getName().toLowerCase()), - buildBatchGetEntityPath(e)); - paths.addPathItem( - String.format("/v3/entity/%s/{urn}", e.getName().toLowerCase()), - buildSingleEntityPath(e)); - }); + definedEntitySpecs.forEach( + e -> { + paths.addPathItem( + String.format("/v3/entity/%s", e.getName().toLowerCase()), buildListEntityPath(e)); + paths.addPathItem( + String.format("/v3/entity/%s/batchGet", e.getName().toLowerCase()), + buildBatchGetEntityPath(e)); + paths.addPathItem( + String.format("/v3/entity/%s/{urn}", e.getName().toLowerCase()), + buildSingleEntityPath(e)); + }); // --> Aspect Paths - entityRegistry.getEntitySpecs().values().stream() - .filter(e -> definitionNames.contains(e.getName())) - .sorted(Comparator.comparing(EntitySpec::getName)) - .forEach( - e -> { - e.getAspectSpecs().stream() - .filter(a -> definitionNames.contains(a.getName())) - .sorted(Comparator.comparing(AspectSpec::getName)) - .forEach( - a -> - paths.addPathItem( - String.format( - "/v3/entity/%s/{urn}/%s", - e.getName().toLowerCase(), a.getName().toLowerCase()), - buildSingleEntityAspectPath(e, a))); - }); + definedEntitySpecs.forEach( + e -> + e.getAspectSpecs().stream() + .filter(a -> definitionNames.contains(a.getName())) + .sorted(Comparator.comparing(AspectSpec::getName)) + .forEach( + a -> + paths.addPathItem( + String.format( + "/v3/entity/%s/{urn}/%s", + e.getName().toLowerCase(), a.getName().toLowerCase()), + buildSingleEntityAspectPath(e, a)))); + definedEntitySpecs.forEach( + e -> + e.getAspectSpecs().stream() + .filter(a -> definitionNames.contains(a.getName())) + .sorted(Comparator.comparing(AspectSpec::getName)) + .forEach( + a -> + paths.addPathItem( + String.format( + "/v3/entity/%s/{urn}/%s", + e.getName().toLowerCase(), a.getName().toLowerCase()), + buildSingleEntityAspectPath(e, a)))); + + // --> Link & Unlink APIs + if (configurationProvider.getFeatureFlags().isEntityVersioning()) { + definedEntitySpecs.stream() + .filter(entitySpec -> VERSION_SET_ENTITY_NAME.equals(entitySpec.getName())) + .forEach( + entitySpec -> { + paths.addPathItem( + "/v3/entity/versioning/{versionSetUrn}/relationship/versionOf/{entityUrn}", + buildVersioningRelationshipPath()); + }); + } + return new OpenAPI().openapi("3.0.1").info(info).paths(paths).components(components); } @@ -1198,4 +1220,115 @@ private static PathItem buildSingleEntityAspectPath( .post(postOperation) .patch(patchOperation); } + + private static Schema buildVersionPropertiesRequestSchema() { + return new Schema<>() + .type(TYPE_OBJECT) + .description("Properties for creating a version relationship") + .properties( + Map.of( + "comment", + new Schema<>() + .type(TYPE_STRING) + .description("Comment about the version") + .nullable(true), + "label", + new Schema<>() + .type(TYPE_STRING) + .description("Label for the version") + .nullable(true), + "sourceCreationTimestamp", + new Schema<>() + .type(TYPE_INTEGER) + .description("Timestamp when version was created in source system") + .nullable(true), + "sourceCreator", + new Schema<>() + .type(TYPE_STRING) + .description("Creator of version in source system") + .nullable(true))); + } + + private static PathItem buildVersioningRelationshipPath() { + final PathItem result = new PathItem(); + + // Common parameters for path + final List parameters = + List.of( + new Parameter() + .in(NAME_PATH) + .name("versionSetUrn") + .description("The Version Set URN to unlink from") + .required(true) + .schema(new Schema().type(TYPE_STRING)), + new Parameter() + .in(NAME_PATH) + .name("entityUrn") + .description("The Entity URN to be unlinked") + .required(true) + .schema(new Schema().type(TYPE_STRING))); + + // Success response for DELETE + final ApiResponse successDeleteResponse = + new ApiResponse() + .description("Successfully unlinked entity from version set") + .content(new Content().addMediaType("application/json", new MediaType())); + + // DELETE operation + final Operation deleteOperation = + new Operation() + .summary("Unlink an entity from a version set") + .description("Removes the version relationship between an entity and a version set") + .tags(List.of("Version Relationships")) + .parameters(parameters) + .responses( + new ApiResponses() + .addApiResponse("200", successDeleteResponse) + .addApiResponse( + "404", new ApiResponse().description("Version Set or Entity not found"))); + + // Success response for POST + final ApiResponse successPostResponse = + new ApiResponse() + .description("Successfully linked entity to version set") + .content( + new Content() + .addMediaType( + "application/json", + new MediaType() + .schema( + new Schema<>() + .$ref( + String.format( + "#/components/schemas/%s%s", + toUpperFirst(VERSION_PROPERTIES_ASPECT_NAME), + ASPECT_RESPONSE_SUFFIX))))); + + // Request body for POST + final RequestBody requestBody = + new RequestBody() + .description("Version properties for the link operation") + .required(true) + .content( + new Content() + .addMediaType( + "application/json", + new MediaType().schema(buildVersionPropertiesRequestSchema()))); + + // POST operation + final Operation postOperation = + new Operation() + .summary("Link an entity to a version set") + .description("Creates a version relationship between an entity and a version set") + .tags(List.of("Version Relationships")) + .parameters(parameters) + .requestBody(requestBody) + .responses( + new ApiResponses() + .addApiResponse("201", successPostResponse) + .addApiResponse( + "404", new ApiResponse().description("Version Set or Entity not found"))); + + return result.delete(deleteOperation).post(postOperation); + } } diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java index af13cd3aab0510..a4583082d57c7f 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/v3/controller/EntityController.java @@ -1,7 +1,9 @@ package io.datahubproject.openapi.v3.controller; +import static com.linkedin.metadata.Constants.VERSION_SET_ENTITY_NAME; import static com.linkedin.metadata.aspect.validation.ConditionalWriteValidator.HTTP_HEADER_IF_VERSION_MATCH; import static com.linkedin.metadata.authorization.ApiOperation.READ; +import static com.linkedin.metadata.authorization.ApiOperation.UPDATE; import com.datahub.authentication.Actor; import com.datahub.authentication.Authentication; @@ -11,22 +13,28 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; +import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.ByteString; import com.linkedin.data.template.SetMode; import com.linkedin.data.template.StringMap; import com.linkedin.entity.EnvelopedAspect; import com.linkedin.events.metadata.ChangeType; +import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.aspect.batch.BatchItem; import com.linkedin.metadata.aspect.batch.ChangeMCP; import com.linkedin.metadata.entity.EntityApiUtils; import com.linkedin.metadata.entity.IngestResult; +import com.linkedin.metadata.entity.RollbackResult; import com.linkedin.metadata.entity.UpdateAspectResult; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; import com.linkedin.metadata.entity.ebean.batch.ChangeItemImpl; import com.linkedin.metadata.entity.ebean.batch.ProposedItem; +import com.linkedin.metadata.entity.versioning.EntityVersioningService; +import com.linkedin.metadata.entity.versioning.VersionPropertiesInput; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.query.filter.SortCriterion; @@ -71,9 +79,12 @@ import javax.annotation.Nullable; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; import org.springframework.http.MediaType; import org.springframework.http.ResponseEntity; import org.springframework.util.CollectionUtils; +import org.springframework.web.bind.annotation.DeleteMapping; +import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.RequestBody; import org.springframework.web.bind.annotation.RequestMapping; @@ -89,6 +100,9 @@ public class EntityController extends GenericEntitiesController< GenericAspectV3, GenericEntityV3, GenericEntityScrollResultV3> { + @Autowired private final EntityVersioningService entityVersioningService; + @Autowired private final ConfigurationProvider configurationProvider; + @Tag(name = "Generic Entities") @PostMapping(value = "/{entityName}/batchGet", produces = MediaType.APPLICATION_JSON_VALUE) @Operation(summary = "Get a batch of entities") @@ -222,6 +236,111 @@ public ResponseEntity scrollEntities( entityAspectsBody.getAspects() != null)); } + @Tag(name = "EntityVersioning") + @PostMapping( + value = "/versioning/{versionSetUrn}/relationship/versionOf/{entityUrn}", + produces = MediaType.APPLICATION_JSON_VALUE) + @Operation(summary = "Link an Entity to a Version Set as the latest version") + public ResponseEntity> linkLatestVersion( + HttpServletRequest request, + @PathVariable("versionSetUrn") String versionSetUrnString, + @PathVariable("entityUrn") String entityUrnString, + @RequestParam(value = "systemMetadata", required = false, defaultValue = "false") + Boolean withSystemMetadata, + @RequestBody @Nonnull VersionPropertiesInput versionPropertiesInput) + throws URISyntaxException, JsonProcessingException { + + if (!configurationProvider.getFeatureFlags().isEntityVersioning()) { + throw new IllegalAccessError( + "Entity Versioning is not configured, please enable before attempting to use this feature."); + } + Authentication authentication = AuthenticationContext.getAuthentication(); + Urn versionSetUrn = UrnUtils.getUrn(versionSetUrnString); + if (!VERSION_SET_ENTITY_NAME.equals(versionSetUrn.getEntityType())) { + throw new IllegalArgumentException( + String.format("Version Set urn %s must be of type Version Set.", versionSetUrnString)); + } + Urn entityUrn = UrnUtils.getUrn(entityUrnString); + OperationContext opContext = + OperationContext.asSession( + systemOperationContext, + RequestContext.builder() + .buildOpenapi( + authentication.getActor().toUrnStr(), + request, + "linkLatestVersion", + ImmutableSet.of(entityUrn.getEntityType(), versionSetUrn.getEntityType())), + authorizationChain, + authentication, + true); + if (!AuthUtil.isAPIAuthorizedEntityUrns( + opContext, UPDATE, ImmutableSet.of(versionSetUrn, entityUrn))) { + throw new UnauthorizedException( + String.format( + "%s is unauthorized to %s entities %s and %s", + authentication.getActor().toUrnStr(), UPDATE, versionSetUrnString, entityUrnString)); + } + + return ResponseEntity.ok( + buildEntityList( + opContext, + entityVersioningService.linkLatestVersion( + opContext, versionSetUrn, entityUrn, versionPropertiesInput), + false)); + } + + @Tag(name = "EntityVersioning") + @DeleteMapping( + value = "/versioning/{versionSetUrn}/relationship/versionOf/{entityUrn}", + produces = MediaType.APPLICATION_JSON_VALUE) + @Operation(summary = "Unlink the latest linked version of an entity") + public ResponseEntity> unlinkVersion( + HttpServletRequest request, + @PathVariable("versionSetUrn") String versionSetUrnString, + @PathVariable("entityUrn") String entityUrnString, + @RequestParam(value = "systemMetadata", required = false, defaultValue = "false") + Boolean withSystemMetadata) + throws URISyntaxException, JsonProcessingException { + + if (!configurationProvider.getFeatureFlags().isEntityVersioning()) { + throw new IllegalAccessError( + "Entity Versioning is not configured, please enable before attempting to use this feature."); + } + Authentication authentication = AuthenticationContext.getAuthentication(); + Urn versionSetUrn = UrnUtils.getUrn(versionSetUrnString); + if (!VERSION_SET_ENTITY_NAME.equals(versionSetUrn.getEntityType())) { + throw new IllegalArgumentException( + String.format("Version Set urn %s must be of type Version Set.", versionSetUrnString)); + } + Urn entityUrn = UrnUtils.getUrn(entityUrnString); + OperationContext opContext = + OperationContext.asSession( + systemOperationContext, + RequestContext.builder() + .buildOpenapi( + authentication.getActor().toUrnStr(), + request, + "unlinkVersion", + ImmutableSet.of(entityUrn.getEntityType(), versionSetUrn.getEntityType())), + authorizationChain, + authentication, + true); + if (!AuthUtil.isAPIAuthorizedEntityUrns( + opContext, UPDATE, ImmutableSet.of(versionSetUrn, entityUrn))) { + throw new UnauthorizedException( + String.format( + "%s is unauthorized to %s entities %s and %s", + authentication.getActor().toUrnStr(), UPDATE, versionSetUrnString, entityUrnString)); + } + List rollbackResults = + entityVersioningService.unlinkVersion(opContext, versionSetUrn, entityUrn); + + return ResponseEntity.ok( + rollbackResults.stream() + .map(rollbackResult -> rollbackResult.getUrn().toString()) + .collect(Collectors.toList())); + } + @Override public GenericEntityScrollResultV3 buildScrollResult( @Nonnull OperationContext opContext, @@ -361,7 +480,10 @@ protected List buildEntityList( .auditStamp( withSystemMetadata ? ingest.getRequest().getAuditStamp() : null) .build())) - .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + // Map merge strategy, just take latest one + .collect( + Collectors.toMap( + Map.Entry::getKey, Map.Entry::getValue, (value1, value2) -> value2)); responseList.add( GenericEntityV3.builder().build(objectMapper, urnAspects.getKey(), aspectsMap)); } diff --git a/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/OpenAPIV3GeneratorTest.java b/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/OpenAPIV3GeneratorTest.java index e1568017156d9b..d8f04b60455abb 100644 --- a/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/OpenAPIV3GeneratorTest.java +++ b/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/OpenAPIV3GeneratorTest.java @@ -6,6 +6,8 @@ import static org.testng.Assert.assertTrue; import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; +import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; import io.swagger.v3.core.util.Yaml; import io.swagger.v3.oas.models.OpenAPI; @@ -36,8 +38,10 @@ public void testOpenApiSpecBuilder() throws Exception { OpenAPIV3GeneratorTest.class .getClassLoader() .getResourceAsStream("entity-registry.yml")); + ConfigurationProvider configurationProvider = new ConfigurationProvider(); + configurationProvider.setFeatureFlags(new FeatureFlags()); - OpenAPI openAPI = OpenAPIV3Generator.generateOpenApiSpec(er); + OpenAPI openAPI = OpenAPIV3Generator.generateOpenApiSpec(er, configurationProvider); String openapiYaml = Yaml.pretty(openAPI); Files.write( Path.of(getClass().getResource("/").getPath(), "open-api.yaml"), diff --git a/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java b/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java index 952dc31c5ba386..e82ab50a0defeb 100644 --- a/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java +++ b/metadata-service/openapi-servlet/src/test/java/io/datahubproject/openapi/v3/controller/EntityControllerTest.java @@ -33,9 +33,12 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.RecordTemplate; +import com.linkedin.datahub.graphql.featureflags.FeatureFlags; import com.linkedin.dataset.DatasetProfile; import com.linkedin.entity.Aspect; import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.gms.factory.config.ConfigurationProvider; +import com.linkedin.gms.factory.entity.versioning.EntityVersioningServiceFactory; import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.EntityServiceImpl; @@ -57,6 +60,7 @@ import io.datahubproject.openapi.config.SpringWebConfig; import io.datahubproject.openapi.exception.InvalidUrnException; import io.datahubproject.test.metadata.context.TestOperationContexts; +import jakarta.servlet.ServletException; import java.util.Collections; import java.util.List; import java.util.Map; @@ -81,7 +85,11 @@ @SpringBootTest(classes = {SpringWebConfig.class}) @ComponentScan(basePackages = {"io.datahubproject.openapi.v3.controller"}) -@Import({SpringWebConfig.class, EntityControllerTest.EntityControllerTestConfig.class}) +@Import({ + SpringWebConfig.class, + EntityControllerTest.EntityControllerTestConfig.class, + EntityVersioningServiceFactory.class +}) @AutoConfigureWebMvc @AutoConfigureMockMvc public class EntityControllerTest extends AbstractTestNGSpringContextTests { @@ -92,6 +100,7 @@ public class EntityControllerTest extends AbstractTestNGSpringContextTests { @Autowired private TimeseriesAspectService mockTimeseriesAspectService; @Autowired private EntityRegistry entityRegistry; @Autowired private OperationContext opContext; + @MockBean private ConfigurationProvider configurationProvider; @Test public void initTest() { @@ -431,4 +440,211 @@ public TimeseriesAspectService timeseriesAspectService() { return timeseriesAspectService; } } + + @Test + public void testGetEntityBatchWithMultipleEntities() throws Exception { + List TEST_URNS = + List.of( + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:testPlatform,1,PROD)"), + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:testPlatform,2,PROD)")); + + // Mock entity aspect response + when(mockEntityService.getEnvelopedVersionedAspects( + any(OperationContext.class), anyMap(), eq(false))) + .thenReturn( + Map.of( + TEST_URNS.get(0), + List.of( + new EnvelopedAspect() + .setName("status") + .setValue(new Aspect(new Status().data()))), + TEST_URNS.get(1), + List.of( + new EnvelopedAspect() + .setName("status") + .setValue(new Aspect(new Status().data()))))); + + String requestBody = + String.format( + "[{\"urn\": \"%s\"}, {\"urn\": \"%s\"}]", + TEST_URNS.get(0).toString(), TEST_URNS.get(1).toString()); + + mockMvc + .perform( + MockMvcRequestBuilders.post("/v3/entity/dataset/batchGet") + .content(requestBody) + .contentType(MediaType.APPLICATION_JSON) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is2xxSuccessful()) + .andExpect(MockMvcResultMatchers.jsonPath("$[0].urn").value(TEST_URNS.get(0).toString())) + .andExpect(MockMvcResultMatchers.jsonPath("$[1].urn").value(TEST_URNS.get(1).toString())); + } + + @Test(expectedExceptions = ServletException.class) + public void testGetEntityBatchWithInvalidUrn() throws Exception { + String requestBody = "[{\"urn\": \"invalid:urn\"}]"; + + mockMvc + .perform( + MockMvcRequestBuilders.post("/v3/entity/dataset/batchGet") + .content(requestBody) + .contentType(MediaType.APPLICATION_JSON) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is4xxClientError()); + } + + @Test + public void testScrollEntitiesWithMultipleSortFields() throws Exception { + List TEST_URNS = + List.of( + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:testPlatform,1,PROD)"), + UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:testPlatform,2,PROD)")); + + ScrollResult expectedResult = + new ScrollResult() + .setEntities( + new SearchEntityArray( + List.of( + new SearchEntity().setEntity(TEST_URNS.get(0)), + new SearchEntity().setEntity(TEST_URNS.get(1))))); + + when(mockSearchService.scrollAcrossEntities( + any(OperationContext.class), + eq(List.of("dataset")), + anyString(), + nullable(Filter.class), + any(), + nullable(String.class), + nullable(String.class), + anyInt())) + .thenReturn(expectedResult); + + when(mockEntityService.getEnvelopedVersionedAspects( + any(OperationContext.class), anyMap(), eq(false))) + .thenReturn( + Map.of( + TEST_URNS.get(0), + List.of( + new EnvelopedAspect() + .setName("status") + .setValue(new Aspect(new Status().data()))))); + + mockMvc + .perform( + MockMvcRequestBuilders.post("/v3/entity/scroll") + .content("{\"entities\":[\"dataset\"]}") + .param("sortCriteria", "name", "urn") + .contentType(MediaType.APPLICATION_JSON) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is2xxSuccessful()) + .andExpect( + MockMvcResultMatchers.jsonPath("$.entities[0].urn").value(TEST_URNS.get(0).toString())); + } + + @Test + public void testScrollEntitiesWithPitKeepAlive() throws Exception { + List TEST_URNS = + List.of(UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:testPlatform,1,PROD)")); + + ScrollResult expectedResult = + new ScrollResult() + .setEntities( + new SearchEntityArray(List.of(new SearchEntity().setEntity(TEST_URNS.get(0))))) + .setScrollId("test-scroll-id"); + + when(mockSearchService.scrollAcrossEntities( + any(OperationContext.class), + eq(List.of("dataset")), + anyString(), + nullable(Filter.class), + any(), + nullable(String.class), + eq("10m"), + anyInt())) + .thenReturn(expectedResult); + + when(mockEntityService.getEnvelopedVersionedAspects( + any(OperationContext.class), anyMap(), eq(false))) + .thenReturn( + Map.of( + TEST_URNS.get(0), + List.of( + new EnvelopedAspect() + .setName("status") + .setValue(new Aspect(new Status().data()))))); + + mockMvc + .perform( + MockMvcRequestBuilders.post("/v3/entity/scroll") + .content("{\"entities\":[\"dataset\"]}") + .param("pitKeepAlive", "10m") + .contentType(MediaType.APPLICATION_JSON) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is2xxSuccessful()) + .andExpect(MockMvcResultMatchers.jsonPath("$.scrollId").value("test-scroll-id")); + } + + @Test(expectedExceptions = ServletException.class) + public void testEntityVersioningFeatureFlagDisabled() throws Exception { + Urn TEST_URN = UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:testPlatform,1,PROD)"); + Urn VERSION_SET_URN = UrnUtils.getUrn("urn:li:versionSet:test-version-set"); + + FeatureFlags mockFeatureFlags = mock(FeatureFlags.class); + when(configurationProvider.getFeatureFlags()).thenReturn(mockFeatureFlags); + when(mockFeatureFlags.isEntityVersioning()).thenReturn(false); + + // Test linking version with disabled flag + mockMvc + .perform( + MockMvcRequestBuilders.post( + String.format( + "/v3/entity/versioning/%s/relationship/versionOf/%s", + VERSION_SET_URN, TEST_URN)) + .content("{}") + .contentType(MediaType.APPLICATION_JSON) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is4xxClientError()); + + // Test unlinking version with disabled flag + mockMvc + .perform( + MockMvcRequestBuilders.delete( + String.format( + "/v3/entity/versioning/%s/relationship/versionOf/%s", + VERSION_SET_URN, TEST_URN)) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is4xxClientError()); + } + + @Test(expectedExceptions = ServletException.class) + public void testInvalidVersionSetUrn() throws Exception { + Urn TEST_URN = UrnUtils.getUrn("urn:li:dataset:(urn:li:dataPlatform:testPlatform,1,PROD)"); + String INVALID_VERSION_SET_URN = "urn:li:dataset:invalid-version-set"; + + FeatureFlags mockFeatureFlags = mock(FeatureFlags.class); + when(configurationProvider.getFeatureFlags()).thenReturn(mockFeatureFlags); + when(mockFeatureFlags.isEntityVersioning()).thenReturn(true); + + // Test linking with invalid version set URN + mockMvc + .perform( + MockMvcRequestBuilders.post( + String.format( + "/v3/entity/versioning/%s/relationship/versionOf/%s", + INVALID_VERSION_SET_URN, TEST_URN)) + .content("{}") + .contentType(MediaType.APPLICATION_JSON) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is4xxClientError()); + + // Test unlinking with invalid version set URN + mockMvc + .perform( + MockMvcRequestBuilders.delete( + String.format( + "/v3/entity/versioning/%s/relationship/versionOf/%s", + INVALID_VERSION_SET_URN, TEST_URN)) + .accept(MediaType.APPLICATION_JSON)) + .andExpect(status().is4xxClientError()); + } } diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json index 1c713fd33884b5..af11532ccf4ece 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json @@ -1382,6 +1382,10 @@ "name" : "versionTag", "type" : "string", "optional" : true + }, { + "name" : "metadataAttribution", + "type" : "MetadataAttribution", + "optional" : true } ] }, { "type" : "record", @@ -3827,7 +3831,43 @@ "name" : "MLModelProperties", "namespace" : "com.linkedin.ml.metadata", "doc" : "Properties associated with a ML Model\r", - "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference" ], + "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference", { + "type" : "record", + "name" : "MLModelLineageInfo", + "doc" : "A set of re-usable fields used to capture lineage information for ML Models and ML Model Groups\r", + "fields" : [ { + "name" : "trainingJobs", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", + "optional" : true, + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataJob", "dataProcessInstance" ], + "isLineage" : true, + "name" : "TrainedBy" + } + } + }, { + "name" : "downstreamJobs", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "List of jobs or process instances (if any) that use the model or group.\r", + "optional" : true, + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataJob", "dataProcessInstance" ], + "isLineage" : true, + "isUpstream" : false, + "name" : "UsedBy" + } + } + } ] + } ], "fields" : [ { "name" : "name", "type" : "string", @@ -4005,37 +4045,6 @@ "name" : "DeployedTo" } } - }, { - "name" : "trainingJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob", "dataProcessInstance" ], - "isLineage" : true, - "name" : "TrainedBy" - } - } - }, { - "name" : "downstreamJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) that use the model\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob" ], - "isLineage" : true, - "isUpstream" : false, - "name" : "UsedBy" - } - } }, { "name" : "groups", "type" : { @@ -4213,7 +4222,7 @@ }, "doc" : "The order to sort the results i.e. ASCENDING or DESCENDING" } ] - }, "com.linkedin.metadata.query.filter.SortOrder", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", "com.linkedin.mxe.GenericAspect", { + }, "com.linkedin.metadata.query.filter.SortOrder", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelLineageInfo", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", "com.linkedin.mxe.GenericAspect", { "type" : "record", "name" : "MetadataChangeProposal", "namespace" : "com.linkedin.mxe", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json index 77d4644f3c121a..f58d83dd1e5cb7 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json @@ -1409,6 +1409,10 @@ "name" : "versionTag", "type" : "string", "optional" : true + }, { + "name" : "metadataAttribution", + "type" : "MetadataAttribution", + "optional" : true } ] }, "com.linkedin.common.fieldtransformer.TransformationType", "com.linkedin.common.fieldtransformer.UDFTransformer", { "type" : "record", @@ -3985,7 +3989,43 @@ "name" : "MLModelProperties", "namespace" : "com.linkedin.ml.metadata", "doc" : "Properties associated with a ML Model\r", - "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference" ], + "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference", { + "type" : "record", + "name" : "MLModelLineageInfo", + "doc" : "A set of re-usable fields used to capture lineage information for ML Models and ML Model Groups\r", + "fields" : [ { + "name" : "trainingJobs", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", + "optional" : true, + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataJob", "dataProcessInstance" ], + "isLineage" : true, + "name" : "TrainedBy" + } + } + }, { + "name" : "downstreamJobs", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "List of jobs or process instances (if any) that use the model or group.\r", + "optional" : true, + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataJob", "dataProcessInstance" ], + "isLineage" : true, + "isUpstream" : false, + "name" : "UsedBy" + } + } + } ] + } ], "fields" : [ { "name" : "name", "type" : "string", @@ -4163,37 +4203,6 @@ "name" : "DeployedTo" } } - }, { - "name" : "trainingJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob", "dataProcessInstance" ], - "isLineage" : true, - "name" : "TrainedBy" - } - } - }, { - "name" : "downstreamJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) that use the model\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob" ], - "isLineage" : true, - "isUpstream" : false, - "name" : "UsedBy" - } - } }, { "name" : "groups", "type" : { @@ -5004,7 +5013,7 @@ "name" : "MLModelGroupProperties", "namespace" : "com.linkedin.ml.metadata", "doc" : "Properties associated with an ML Model Group\r", - "include" : [ "com.linkedin.common.CustomProperties" ], + "include" : [ "com.linkedin.common.CustomProperties", "MLModelLineageInfo" ], "fields" : [ { "name" : "name", "type" : "string", @@ -5041,21 +5050,6 @@ "type" : "com.linkedin.common.TimeStamp", "doc" : "Date when the MLModelGroup was last modified\r", "optional" : true - }, { - "name" : "trainingJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) used to train the model group. Visible in Lineage.\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob" ], - "isLineage" : true, - "name" : "TrainedBy" - } - } }, { "name" : "version", "type" : "com.linkedin.common.VersionTag", @@ -6149,6 +6143,12 @@ "doc" : "Include default facets when getting facets to aggregate on in search requests.\nBy default we include these, but custom aggregation requests don't need them.", "default" : true, "optional" : true + }, { + "name" : "filterNonLatestVersions", + "type" : "boolean", + "doc" : "Include only latest versions in version sets, default true", + "default" : true, + "optional" : true } ] }, { "type" : "enum", @@ -6700,7 +6700,7 @@ "type" : "int", "doc" : "The total number of entities directly under searched path" } ] - }, "com.linkedin.metadata.search.SearchResultMetadata", "com.linkedin.metadata.search.SearchSuggestion", "com.linkedin.metadata.snapshot.ChartSnapshot", "com.linkedin.metadata.snapshot.CorpGroupSnapshot", "com.linkedin.metadata.snapshot.CorpUserSnapshot", "com.linkedin.metadata.snapshot.DashboardSnapshot", "com.linkedin.metadata.snapshot.DataFlowSnapshot", "com.linkedin.metadata.snapshot.DataHubPolicySnapshot", "com.linkedin.metadata.snapshot.DataHubRetentionSnapshot", "com.linkedin.metadata.snapshot.DataJobSnapshot", "com.linkedin.metadata.snapshot.DataPlatformSnapshot", "com.linkedin.metadata.snapshot.DataProcessSnapshot", "com.linkedin.metadata.snapshot.DatasetSnapshot", "com.linkedin.metadata.snapshot.GlossaryNodeSnapshot", "com.linkedin.metadata.snapshot.GlossaryTermSnapshot", "com.linkedin.metadata.snapshot.MLFeatureSnapshot", "com.linkedin.metadata.snapshot.MLFeatureTableSnapshot", "com.linkedin.metadata.snapshot.MLModelDeploymentSnapshot", "com.linkedin.metadata.snapshot.MLModelGroupSnapshot", "com.linkedin.metadata.snapshot.MLModelSnapshot", "com.linkedin.metadata.snapshot.MLPrimaryKeySnapshot", "com.linkedin.metadata.snapshot.SchemaFieldSnapshot", "com.linkedin.metadata.snapshot.Snapshot", "com.linkedin.metadata.snapshot.TagSnapshot", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.DeploymentStatus", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLFeatureTableProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelDeploymentProperties", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelGroupProperties", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.MLPrimaryKeyProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", { + }, "com.linkedin.metadata.search.SearchResultMetadata", "com.linkedin.metadata.search.SearchSuggestion", "com.linkedin.metadata.snapshot.ChartSnapshot", "com.linkedin.metadata.snapshot.CorpGroupSnapshot", "com.linkedin.metadata.snapshot.CorpUserSnapshot", "com.linkedin.metadata.snapshot.DashboardSnapshot", "com.linkedin.metadata.snapshot.DataFlowSnapshot", "com.linkedin.metadata.snapshot.DataHubPolicySnapshot", "com.linkedin.metadata.snapshot.DataHubRetentionSnapshot", "com.linkedin.metadata.snapshot.DataJobSnapshot", "com.linkedin.metadata.snapshot.DataPlatformSnapshot", "com.linkedin.metadata.snapshot.DataProcessSnapshot", "com.linkedin.metadata.snapshot.DatasetSnapshot", "com.linkedin.metadata.snapshot.GlossaryNodeSnapshot", "com.linkedin.metadata.snapshot.GlossaryTermSnapshot", "com.linkedin.metadata.snapshot.MLFeatureSnapshot", "com.linkedin.metadata.snapshot.MLFeatureTableSnapshot", "com.linkedin.metadata.snapshot.MLModelDeploymentSnapshot", "com.linkedin.metadata.snapshot.MLModelGroupSnapshot", "com.linkedin.metadata.snapshot.MLModelSnapshot", "com.linkedin.metadata.snapshot.MLPrimaryKeySnapshot", "com.linkedin.metadata.snapshot.SchemaFieldSnapshot", "com.linkedin.metadata.snapshot.Snapshot", "com.linkedin.metadata.snapshot.TagSnapshot", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.DeploymentStatus", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLFeatureTableProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelDeploymentProperties", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelGroupProperties", "com.linkedin.ml.metadata.MLModelLineageInfo", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.MLPrimaryKeyProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", { "type" : "record", "name" : "SystemMetadata", "namespace" : "com.linkedin.mxe", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json index 8b6def75f7a665..61c31f93987b88 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json @@ -1115,6 +1115,10 @@ "name" : "versionTag", "type" : "string", "optional" : true + }, { + "name" : "metadataAttribution", + "type" : "MetadataAttribution", + "optional" : true } ] }, { "type" : "record", @@ -3551,7 +3555,43 @@ "name" : "MLModelProperties", "namespace" : "com.linkedin.ml.metadata", "doc" : "Properties associated with a ML Model\r", - "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference" ], + "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference", { + "type" : "record", + "name" : "MLModelLineageInfo", + "doc" : "A set of re-usable fields used to capture lineage information for ML Models and ML Model Groups\r", + "fields" : [ { + "name" : "trainingJobs", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", + "optional" : true, + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataJob", "dataProcessInstance" ], + "isLineage" : true, + "name" : "TrainedBy" + } + } + }, { + "name" : "downstreamJobs", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "List of jobs or process instances (if any) that use the model or group.\r", + "optional" : true, + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataJob", "dataProcessInstance" ], + "isLineage" : true, + "isUpstream" : false, + "name" : "UsedBy" + } + } + } ] + } ], "fields" : [ { "name" : "name", "type" : "string", @@ -3729,37 +3769,6 @@ "name" : "DeployedTo" } } - }, { - "name" : "trainingJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob", "dataProcessInstance" ], - "isLineage" : true, - "name" : "TrainedBy" - } - } - }, { - "name" : "downstreamJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) that use the model\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob" ], - "isLineage" : true, - "isUpstream" : false, - "name" : "UsedBy" - } - } }, { "name" : "groups", "type" : { @@ -4002,7 +4011,7 @@ } } } ] - }, "com.linkedin.metadata.run.UnsafeEntityInfo", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", "com.linkedin.schema.ArrayType", "com.linkedin.schema.BinaryJsonSchema", "com.linkedin.schema.BooleanType", "com.linkedin.schema.BytesType", "com.linkedin.schema.DatasetFieldForeignKey", "com.linkedin.schema.DateType", "com.linkedin.schema.EditableSchemaFieldInfo", "com.linkedin.schema.EditableSchemaMetadata", "com.linkedin.schema.EnumType", "com.linkedin.schema.EspressoSchema", "com.linkedin.schema.FixedType", "com.linkedin.schema.ForeignKeyConstraint", "com.linkedin.schema.ForeignKeySpec", "com.linkedin.schema.KafkaSchema", "com.linkedin.schema.KeyValueSchema", "com.linkedin.schema.MapType", "com.linkedin.schema.MySqlDDL", "com.linkedin.schema.NullType", "com.linkedin.schema.NumberType", "com.linkedin.schema.OracleDDL", "com.linkedin.schema.OrcSchema", "com.linkedin.schema.OtherSchema", "com.linkedin.schema.PrestoDDL", "com.linkedin.schema.RecordType", "com.linkedin.schema.SchemaField", "com.linkedin.schema.SchemaFieldDataType", "com.linkedin.schema.SchemaMetadata", "com.linkedin.schema.SchemaMetadataKey", "com.linkedin.schema.Schemaless", "com.linkedin.schema.StringType", "com.linkedin.schema.TimeType", "com.linkedin.schema.UnionType", "com.linkedin.schema.UrnForeignKey", "com.linkedin.tag.TagProperties" ], + }, "com.linkedin.metadata.run.UnsafeEntityInfo", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelLineageInfo", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", "com.linkedin.schema.ArrayType", "com.linkedin.schema.BinaryJsonSchema", "com.linkedin.schema.BooleanType", "com.linkedin.schema.BytesType", "com.linkedin.schema.DatasetFieldForeignKey", "com.linkedin.schema.DateType", "com.linkedin.schema.EditableSchemaFieldInfo", "com.linkedin.schema.EditableSchemaMetadata", "com.linkedin.schema.EnumType", "com.linkedin.schema.EspressoSchema", "com.linkedin.schema.FixedType", "com.linkedin.schema.ForeignKeyConstraint", "com.linkedin.schema.ForeignKeySpec", "com.linkedin.schema.KafkaSchema", "com.linkedin.schema.KeyValueSchema", "com.linkedin.schema.MapType", "com.linkedin.schema.MySqlDDL", "com.linkedin.schema.NullType", "com.linkedin.schema.NumberType", "com.linkedin.schema.OracleDDL", "com.linkedin.schema.OrcSchema", "com.linkedin.schema.OtherSchema", "com.linkedin.schema.PrestoDDL", "com.linkedin.schema.RecordType", "com.linkedin.schema.SchemaField", "com.linkedin.schema.SchemaFieldDataType", "com.linkedin.schema.SchemaMetadata", "com.linkedin.schema.SchemaMetadataKey", "com.linkedin.schema.Schemaless", "com.linkedin.schema.StringType", "com.linkedin.schema.TimeType", "com.linkedin.schema.UnionType", "com.linkedin.schema.UrnForeignKey", "com.linkedin.tag.TagProperties" ], "schema" : { "name" : "runs", "namespace" : "com.linkedin.entity", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json index e4cc5c42303ee2..75793be7331da4 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json @@ -1115,6 +1115,10 @@ "name" : "versionTag", "type" : "string", "optional" : true + }, { + "name" : "metadataAttribution", + "type" : "MetadataAttribution", + "optional" : true } ] }, { "type" : "record", @@ -3545,7 +3549,43 @@ "name" : "MLModelProperties", "namespace" : "com.linkedin.ml.metadata", "doc" : "Properties associated with a ML Model\r", - "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference" ], + "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference", { + "type" : "record", + "name" : "MLModelLineageInfo", + "doc" : "A set of re-usable fields used to capture lineage information for ML Models and ML Model Groups\r", + "fields" : [ { + "name" : "trainingJobs", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", + "optional" : true, + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataJob", "dataProcessInstance" ], + "isLineage" : true, + "name" : "TrainedBy" + } + } + }, { + "name" : "downstreamJobs", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "List of jobs or process instances (if any) that use the model or group.\r", + "optional" : true, + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataJob", "dataProcessInstance" ], + "isLineage" : true, + "isUpstream" : false, + "name" : "UsedBy" + } + } + } ] + } ], "fields" : [ { "name" : "name", "type" : "string", @@ -3723,37 +3763,6 @@ "name" : "DeployedTo" } } - }, { - "name" : "trainingJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob", "dataProcessInstance" ], - "isLineage" : true, - "name" : "TrainedBy" - } - } - }, { - "name" : "downstreamJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) that use the model\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob" ], - "isLineage" : true, - "isUpstream" : false, - "name" : "UsedBy" - } - } }, { "name" : "groups", "type" : { @@ -3908,7 +3917,7 @@ "name" : "version", "type" : "long" } ] - }, "com.linkedin.metadata.key.ChartKey", "com.linkedin.metadata.key.CorpGroupKey", "com.linkedin.metadata.key.CorpUserKey", "com.linkedin.metadata.key.DashboardKey", "com.linkedin.metadata.key.DataFlowKey", "com.linkedin.metadata.key.DataJobKey", "com.linkedin.metadata.key.GlossaryNodeKey", "com.linkedin.metadata.key.GlossaryTermKey", "com.linkedin.metadata.key.MLFeatureKey", "com.linkedin.metadata.key.MLModelKey", "com.linkedin.metadata.key.TagKey", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", "com.linkedin.schema.ArrayType", "com.linkedin.schema.BinaryJsonSchema", "com.linkedin.schema.BooleanType", "com.linkedin.schema.BytesType", "com.linkedin.schema.DatasetFieldForeignKey", "com.linkedin.schema.DateType", "com.linkedin.schema.EditableSchemaFieldInfo", "com.linkedin.schema.EditableSchemaMetadata", "com.linkedin.schema.EnumType", "com.linkedin.schema.EspressoSchema", "com.linkedin.schema.FixedType", "com.linkedin.schema.ForeignKeyConstraint", "com.linkedin.schema.ForeignKeySpec", "com.linkedin.schema.KafkaSchema", "com.linkedin.schema.KeyValueSchema", "com.linkedin.schema.MapType", "com.linkedin.schema.MySqlDDL", "com.linkedin.schema.NullType", "com.linkedin.schema.NumberType", "com.linkedin.schema.OracleDDL", "com.linkedin.schema.OrcSchema", "com.linkedin.schema.OtherSchema", "com.linkedin.schema.PrestoDDL", "com.linkedin.schema.RecordType", "com.linkedin.schema.SchemaField", "com.linkedin.schema.SchemaFieldDataType", "com.linkedin.schema.SchemaMetadata", "com.linkedin.schema.SchemaMetadataKey", "com.linkedin.schema.Schemaless", "com.linkedin.schema.StringType", "com.linkedin.schema.TimeType", "com.linkedin.schema.UnionType", "com.linkedin.schema.UrnForeignKey", "com.linkedin.tag.TagProperties", { + }, "com.linkedin.metadata.key.ChartKey", "com.linkedin.metadata.key.CorpGroupKey", "com.linkedin.metadata.key.CorpUserKey", "com.linkedin.metadata.key.DashboardKey", "com.linkedin.metadata.key.DataFlowKey", "com.linkedin.metadata.key.DataJobKey", "com.linkedin.metadata.key.GlossaryNodeKey", "com.linkedin.metadata.key.GlossaryTermKey", "com.linkedin.metadata.key.MLFeatureKey", "com.linkedin.metadata.key.MLModelKey", "com.linkedin.metadata.key.TagKey", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelLineageInfo", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", "com.linkedin.schema.ArrayType", "com.linkedin.schema.BinaryJsonSchema", "com.linkedin.schema.BooleanType", "com.linkedin.schema.BytesType", "com.linkedin.schema.DatasetFieldForeignKey", "com.linkedin.schema.DateType", "com.linkedin.schema.EditableSchemaFieldInfo", "com.linkedin.schema.EditableSchemaMetadata", "com.linkedin.schema.EnumType", "com.linkedin.schema.EspressoSchema", "com.linkedin.schema.FixedType", "com.linkedin.schema.ForeignKeyConstraint", "com.linkedin.schema.ForeignKeySpec", "com.linkedin.schema.KafkaSchema", "com.linkedin.schema.KeyValueSchema", "com.linkedin.schema.MapType", "com.linkedin.schema.MySqlDDL", "com.linkedin.schema.NullType", "com.linkedin.schema.NumberType", "com.linkedin.schema.OracleDDL", "com.linkedin.schema.OrcSchema", "com.linkedin.schema.OtherSchema", "com.linkedin.schema.PrestoDDL", "com.linkedin.schema.RecordType", "com.linkedin.schema.SchemaField", "com.linkedin.schema.SchemaFieldDataType", "com.linkedin.schema.SchemaMetadata", "com.linkedin.schema.SchemaMetadataKey", "com.linkedin.schema.Schemaless", "com.linkedin.schema.StringType", "com.linkedin.schema.TimeType", "com.linkedin.schema.UnionType", "com.linkedin.schema.UrnForeignKey", "com.linkedin.tag.TagProperties", { "type" : "record", "name" : "TimeseriesIndexSizeResult", "namespace" : "com.linkedin.timeseries", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json index e375ac698ab516..58ba2ad05dfe74 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json @@ -1409,6 +1409,10 @@ "name" : "versionTag", "type" : "string", "optional" : true + }, { + "name" : "metadataAttribution", + "type" : "MetadataAttribution", + "optional" : true } ] }, "com.linkedin.common.fieldtransformer.TransformationType", "com.linkedin.common.fieldtransformer.UDFTransformer", { "type" : "record", @@ -3979,7 +3983,43 @@ "name" : "MLModelProperties", "namespace" : "com.linkedin.ml.metadata", "doc" : "Properties associated with a ML Model\r", - "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference" ], + "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference", { + "type" : "record", + "name" : "MLModelLineageInfo", + "doc" : "A set of re-usable fields used to capture lineage information for ML Models and ML Model Groups\r", + "fields" : [ { + "name" : "trainingJobs", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", + "optional" : true, + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataJob", "dataProcessInstance" ], + "isLineage" : true, + "name" : "TrainedBy" + } + } + }, { + "name" : "downstreamJobs", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "List of jobs or process instances (if any) that use the model or group.\r", + "optional" : true, + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataJob", "dataProcessInstance" ], + "isLineage" : true, + "isUpstream" : false, + "name" : "UsedBy" + } + } + } ] + } ], "fields" : [ { "name" : "name", "type" : "string", @@ -4157,37 +4197,6 @@ "name" : "DeployedTo" } } - }, { - "name" : "trainingJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob", "dataProcessInstance" ], - "isLineage" : true, - "name" : "TrainedBy" - } - } - }, { - "name" : "downstreamJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) that use the model\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob" ], - "isLineage" : true, - "isUpstream" : false, - "name" : "UsedBy" - } - } }, { "name" : "groups", "type" : { @@ -4998,7 +5007,7 @@ "name" : "MLModelGroupProperties", "namespace" : "com.linkedin.ml.metadata", "doc" : "Properties associated with an ML Model Group\r", - "include" : [ "com.linkedin.common.CustomProperties" ], + "include" : [ "com.linkedin.common.CustomProperties", "MLModelLineageInfo" ], "fields" : [ { "name" : "name", "type" : "string", @@ -5035,21 +5044,6 @@ "type" : "com.linkedin.common.TimeStamp", "doc" : "Date when the MLModelGroup was last modified\r", "optional" : true - }, { - "name" : "trainingJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) used to train the model group. Visible in Lineage.\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob" ], - "isLineage" : true, - "name" : "TrainedBy" - } - } }, { "name" : "version", "type" : "com.linkedin.common.VersionTag", @@ -5844,7 +5838,7 @@ } ] } } ] - }, "com.linkedin.glossary.GlossaryNodeInfo", "com.linkedin.glossary.GlossaryRelatedTerms", "com.linkedin.glossary.GlossaryTermInfo", "com.linkedin.identity.CorpGroupInfo", "com.linkedin.identity.CorpUserEditableInfo", "com.linkedin.identity.CorpUserInfo", "com.linkedin.identity.CorpUserStatus", "com.linkedin.identity.GroupMembership", "com.linkedin.metadata.aspect.ChartAspect", "com.linkedin.metadata.aspect.CorpGroupAspect", "com.linkedin.metadata.aspect.CorpUserAspect", "com.linkedin.metadata.aspect.DashboardAspect", "com.linkedin.metadata.aspect.DataFlowAspect", "com.linkedin.metadata.aspect.DataHubPolicyAspect", "com.linkedin.metadata.aspect.DataHubRetentionAspect", "com.linkedin.metadata.aspect.DataJobAspect", "com.linkedin.metadata.aspect.DataPlatformAspect", "com.linkedin.metadata.aspect.DataProcessAspect", "com.linkedin.metadata.aspect.DatasetAspect", "com.linkedin.metadata.aspect.GlossaryNodeAspect", "com.linkedin.metadata.aspect.GlossaryTermAspect", "com.linkedin.metadata.aspect.MLFeatureAspect", "com.linkedin.metadata.aspect.MLFeatureTableAspect", "com.linkedin.metadata.aspect.MLModelAspect", "com.linkedin.metadata.aspect.MLModelDeploymentAspect", "com.linkedin.metadata.aspect.MLModelGroupAspect", "com.linkedin.metadata.aspect.MLPrimaryKeyAspect", "com.linkedin.metadata.aspect.SchemaFieldAspect", "com.linkedin.metadata.aspect.TagAspect", "com.linkedin.metadata.key.ChartKey", "com.linkedin.metadata.key.CorpGroupKey", "com.linkedin.metadata.key.CorpUserKey", "com.linkedin.metadata.key.DashboardKey", "com.linkedin.metadata.key.DataFlowKey", "com.linkedin.metadata.key.DataHubPolicyKey", "com.linkedin.metadata.key.DataHubRetentionKey", "com.linkedin.metadata.key.DataJobKey", "com.linkedin.metadata.key.DataPlatformKey", "com.linkedin.metadata.key.DataProcessKey", "com.linkedin.metadata.key.DatasetKey", "com.linkedin.metadata.key.GlossaryNodeKey", "com.linkedin.metadata.key.GlossaryTermKey", "com.linkedin.metadata.key.MLFeatureKey", "com.linkedin.metadata.key.MLFeatureTableKey", "com.linkedin.metadata.key.MLModelDeploymentKey", "com.linkedin.metadata.key.MLModelGroupKey", "com.linkedin.metadata.key.MLModelKey", "com.linkedin.metadata.key.MLPrimaryKeyKey", "com.linkedin.metadata.key.SchemaFieldKey", "com.linkedin.metadata.key.TagKey", "com.linkedin.metadata.snapshot.ChartSnapshot", "com.linkedin.metadata.snapshot.CorpGroupSnapshot", "com.linkedin.metadata.snapshot.CorpUserSnapshot", "com.linkedin.metadata.snapshot.DashboardSnapshot", "com.linkedin.metadata.snapshot.DataFlowSnapshot", "com.linkedin.metadata.snapshot.DataHubPolicySnapshot", "com.linkedin.metadata.snapshot.DataHubRetentionSnapshot", "com.linkedin.metadata.snapshot.DataJobSnapshot", "com.linkedin.metadata.snapshot.DataPlatformSnapshot", "com.linkedin.metadata.snapshot.DataProcessSnapshot", "com.linkedin.metadata.snapshot.DatasetSnapshot", "com.linkedin.metadata.snapshot.GlossaryNodeSnapshot", "com.linkedin.metadata.snapshot.GlossaryTermSnapshot", "com.linkedin.metadata.snapshot.MLFeatureSnapshot", "com.linkedin.metadata.snapshot.MLFeatureTableSnapshot", "com.linkedin.metadata.snapshot.MLModelDeploymentSnapshot", "com.linkedin.metadata.snapshot.MLModelGroupSnapshot", "com.linkedin.metadata.snapshot.MLModelSnapshot", "com.linkedin.metadata.snapshot.MLPrimaryKeySnapshot", "com.linkedin.metadata.snapshot.SchemaFieldSnapshot", "com.linkedin.metadata.snapshot.Snapshot", "com.linkedin.metadata.snapshot.TagSnapshot", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.DeploymentStatus", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLFeatureTableProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelDeploymentProperties", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelGroupProperties", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.MLPrimaryKeyProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", { + }, "com.linkedin.glossary.GlossaryNodeInfo", "com.linkedin.glossary.GlossaryRelatedTerms", "com.linkedin.glossary.GlossaryTermInfo", "com.linkedin.identity.CorpGroupInfo", "com.linkedin.identity.CorpUserEditableInfo", "com.linkedin.identity.CorpUserInfo", "com.linkedin.identity.CorpUserStatus", "com.linkedin.identity.GroupMembership", "com.linkedin.metadata.aspect.ChartAspect", "com.linkedin.metadata.aspect.CorpGroupAspect", "com.linkedin.metadata.aspect.CorpUserAspect", "com.linkedin.metadata.aspect.DashboardAspect", "com.linkedin.metadata.aspect.DataFlowAspect", "com.linkedin.metadata.aspect.DataHubPolicyAspect", "com.linkedin.metadata.aspect.DataHubRetentionAspect", "com.linkedin.metadata.aspect.DataJobAspect", "com.linkedin.metadata.aspect.DataPlatformAspect", "com.linkedin.metadata.aspect.DataProcessAspect", "com.linkedin.metadata.aspect.DatasetAspect", "com.linkedin.metadata.aspect.GlossaryNodeAspect", "com.linkedin.metadata.aspect.GlossaryTermAspect", "com.linkedin.metadata.aspect.MLFeatureAspect", "com.linkedin.metadata.aspect.MLFeatureTableAspect", "com.linkedin.metadata.aspect.MLModelAspect", "com.linkedin.metadata.aspect.MLModelDeploymentAspect", "com.linkedin.metadata.aspect.MLModelGroupAspect", "com.linkedin.metadata.aspect.MLPrimaryKeyAspect", "com.linkedin.metadata.aspect.SchemaFieldAspect", "com.linkedin.metadata.aspect.TagAspect", "com.linkedin.metadata.key.ChartKey", "com.linkedin.metadata.key.CorpGroupKey", "com.linkedin.metadata.key.CorpUserKey", "com.linkedin.metadata.key.DashboardKey", "com.linkedin.metadata.key.DataFlowKey", "com.linkedin.metadata.key.DataHubPolicyKey", "com.linkedin.metadata.key.DataHubRetentionKey", "com.linkedin.metadata.key.DataJobKey", "com.linkedin.metadata.key.DataPlatformKey", "com.linkedin.metadata.key.DataProcessKey", "com.linkedin.metadata.key.DatasetKey", "com.linkedin.metadata.key.GlossaryNodeKey", "com.linkedin.metadata.key.GlossaryTermKey", "com.linkedin.metadata.key.MLFeatureKey", "com.linkedin.metadata.key.MLFeatureTableKey", "com.linkedin.metadata.key.MLModelDeploymentKey", "com.linkedin.metadata.key.MLModelGroupKey", "com.linkedin.metadata.key.MLModelKey", "com.linkedin.metadata.key.MLPrimaryKeyKey", "com.linkedin.metadata.key.SchemaFieldKey", "com.linkedin.metadata.key.TagKey", "com.linkedin.metadata.snapshot.ChartSnapshot", "com.linkedin.metadata.snapshot.CorpGroupSnapshot", "com.linkedin.metadata.snapshot.CorpUserSnapshot", "com.linkedin.metadata.snapshot.DashboardSnapshot", "com.linkedin.metadata.snapshot.DataFlowSnapshot", "com.linkedin.metadata.snapshot.DataHubPolicySnapshot", "com.linkedin.metadata.snapshot.DataHubRetentionSnapshot", "com.linkedin.metadata.snapshot.DataJobSnapshot", "com.linkedin.metadata.snapshot.DataPlatformSnapshot", "com.linkedin.metadata.snapshot.DataProcessSnapshot", "com.linkedin.metadata.snapshot.DatasetSnapshot", "com.linkedin.metadata.snapshot.GlossaryNodeSnapshot", "com.linkedin.metadata.snapshot.GlossaryTermSnapshot", "com.linkedin.metadata.snapshot.MLFeatureSnapshot", "com.linkedin.metadata.snapshot.MLFeatureTableSnapshot", "com.linkedin.metadata.snapshot.MLModelDeploymentSnapshot", "com.linkedin.metadata.snapshot.MLModelGroupSnapshot", "com.linkedin.metadata.snapshot.MLModelSnapshot", "com.linkedin.metadata.snapshot.MLPrimaryKeySnapshot", "com.linkedin.metadata.snapshot.SchemaFieldSnapshot", "com.linkedin.metadata.snapshot.Snapshot", "com.linkedin.metadata.snapshot.TagSnapshot", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.DeploymentStatus", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLFeatureTableProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelDeploymentProperties", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelGroupProperties", "com.linkedin.ml.metadata.MLModelLineageInfo", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.MLPrimaryKeyProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", { "type" : "record", "name" : "GenericPayload", "namespace" : "com.linkedin.mxe", diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/versioning/EntityVersioningService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/versioning/EntityVersioningService.java new file mode 100644 index 00000000000000..9e82efa913a98d --- /dev/null +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/versioning/EntityVersioningService.java @@ -0,0 +1,36 @@ +package com.linkedin.metadata.entity.versioning; + +import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.entity.IngestResult; +import com.linkedin.metadata.entity.RollbackResult; +import io.datahubproject.metadata.context.OperationContext; +import java.util.List; + +public interface EntityVersioningService { + + /** + * Generates a new set of VersionProperties for the latest version and links it to the specified + * version set. If the specified version set does not yet exist, will create it. Order of + * operations here is important: 1. Create initial Version Set if necessary, do not generate + * Version Set Properties 2. Create Version Properties for specified entity. 3. Generate version + * properties with the properly set latest version Will eventually want to add in the scheme here + * as a parameter + * + * @return ingestResult -> the results of the ingested linked version + */ + List linkLatestVersion( + OperationContext opContext, + Urn versionSet, + Urn newLatestVersion, + VersionPropertiesInput inputProperties); + + /** + * Unlinks the latest version from a version set. Will attempt to set up the previous version as + * the new latest. This fully removes the version properties and unversions the specified entity. + * + * @param opContext operational context containing various information about the current execution + * @param currentLatest the currently linked latest versioned entity urn + * @return the deletion result + */ + List unlinkVersion(OperationContext opContext, Urn versionSet, Urn currentLatest); +} diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/versioning/VersionPropertiesInput.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/versioning/VersionPropertiesInput.java new file mode 100644 index 00000000000000..28c320ec717201 --- /dev/null +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/versioning/VersionPropertiesInput.java @@ -0,0 +1,20 @@ +package com.linkedin.metadata.entity.versioning; + +import com.fasterxml.jackson.annotation.JsonInclude; +import lombok.AccessLevel; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +@Data +@Builder +@JsonInclude(JsonInclude.Include.NON_NULL) +@NoArgsConstructor(force = true, access = AccessLevel.PRIVATE) +@AllArgsConstructor +public class VersionPropertiesInput { + private String comment; + private String version; + private Long sourceCreationTimestamp; + private String sourceCreator; +} diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java b/metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java index 7e9d1701bf79a9..4cd9ec6c75b786 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java @@ -87,6 +87,14 @@ public static Filter newDisjunctiveFilter(@Nonnull Criterion... orCriterion) { .collect(Collectors.toCollection(ConjunctiveCriterionArray::new))); } + @Nonnull + public static Filter newConjunctiveFilter(@Nonnull Criterion... andCriterion) { + ConjunctiveCriterionArray orCriteria = new ConjunctiveCriterionArray(); + orCriteria.add( + new ConjunctiveCriterion().setAnd(new CriterionArray(Arrays.asList(andCriterion)))); + return new Filter().setOr(orCriteria); + } + @Nonnull public static ConjunctiveCriterion add( @Nonnull ConjunctiveCriterion conjunctiveCriterion, @Nonnull Criterion element) { diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java index 80a11ab98bbf4a..3c623f8df7c1bf 100644 --- a/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/authorization/PoliciesConfig.java @@ -749,6 +749,14 @@ public class PoliciesConfig { EDIT_ENTITY_TAGS_PRIVILEGE, EDIT_ENTITY_GLOSSARY_TERMS_PRIVILEGE)); + // Version Set privileges + public static final ResourcePrivileges VERSION_SET_PRIVILEGES = + ResourcePrivileges.of( + "versionSet", + "Version Set", + "A logical collection of versioned entities.", + COMMON_ENTITY_PRIVILEGES); + public static final List ENTITY_RESOURCE_PRIVILEGES = ImmutableList.of( DATASET_PRIVILEGES, @@ -767,7 +775,8 @@ public class PoliciesConfig { DATA_PRODUCT_PRIVILEGES, ER_MODEL_RELATIONSHIP_PRIVILEGES, BUSINESS_ATTRIBUTE_PRIVILEGES, - STRUCTURED_PROPERTIES_PRIVILEGES); + STRUCTURED_PROPERTIES_PRIVILEGES, + VERSION_SET_PRIVILEGES); // Merge all entity specific resource privileges to create a superset of all resource privileges public static final ResourcePrivileges ALL_RESOURCE_PRIVILEGES = diff --git a/settings.gradle b/settings.gradle index 77d0706549a439..437a353f210ac4 100644 --- a/settings.gradle +++ b/settings.gradle @@ -79,6 +79,20 @@ include ':metadata-service:openapi-servlet:models' include ':metadata-integration:java:datahub-schematron:lib' include ':metadata-integration:java:datahub-schematron:cli' +buildCache { + def depotSecret = System.getenv('DEPOT_TOKEN'); + + remote(HttpBuildCache) { + url = 'https://cache.depot.dev' + enabled = depotSecret != null + push = true + credentials { + username = '' + password = depotSecret + } + } +} + def installPreCommitHooks() { def preCommitInstalled = false try { @@ -116,7 +130,7 @@ def installPreCommitHooks() { def stderr = new StringBuilder() installHooksProcess.waitForProcessOutput(stdout, stderr) if (installHooksProcess.exitValue() != 0) { - println "Failed to install hooks: ${stderr}" + println "Failed to install hooks: ${stdout}" return } println "Hooks output: ${stdout}" diff --git a/smoke-test/tests/data_process_instance/__init__.py b/smoke-test/tests/data_process_instance/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/smoke-test/tests/data_process_instance/test_data_process_instance.py b/smoke-test/tests/data_process_instance/test_data_process_instance.py new file mode 100644 index 00000000000000..a8aca6034d5be1 --- /dev/null +++ b/smoke-test/tests/data_process_instance/test_data_process_instance.py @@ -0,0 +1,293 @@ +import logging +import os +import tempfile +from random import randint + +import pytest +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.ingestion.api.common import PipelineContext, RecordEnvelope +from datahub.ingestion.api.sink import NoopWriteCallback +from datahub.ingestion.sink.file import FileSink, FileSinkConfig +from datahub.metadata.schema_classes import ( + AuditStampClass, + ContainerClass, + ContainerPropertiesClass, + DataPlatformInstanceClass, + DataPlatformInstancePropertiesClass, + DataProcessInstanceKeyClass, + DataProcessInstancePropertiesClass, + DataProcessInstanceRunEventClass, + MLHyperParamClass, + MLMetricClass, + MLTrainingRunPropertiesClass, + SubTypesClass, + TimeWindowSizeClass, +) + +from tests.utils import ( + delete_urns_from_file, + ingest_file_via_rest, + wait_for_writes_to_sync, +) + +logger = logging.getLogger(__name__) + +# Generate unique DPI ID +dpi_id = f"test-pipeline-run-{randint(1000, 9999)}" +dpi_urn = f"urn:li:dataProcessInstance:{dpi_id}" + + +class FileEmitter: + def __init__(self, filename: str) -> None: + self.sink: FileSink = FileSink( + ctx=PipelineContext(run_id="create_test_data"), + config=FileSinkConfig(filename=filename), + ) + + def emit(self, event): + self.sink.write_record_async( + record_envelope=RecordEnvelope(record=event, metadata={}), + write_callback=NoopWriteCallback(), + ) + + def close(self): + self.sink.close() + + +def create_test_data(filename: str): + mcps = [ + # Key aspect + MetadataChangeProposalWrapper( + entityType="dataProcessInstance", + entityUrn=dpi_urn, + aspectName="dataProcessInstanceKey", + aspect=DataProcessInstanceKeyClass(id=dpi_id), + ), + # Properties aspect + MetadataChangeProposalWrapper( + entityType="dataProcessInstance", + entityUrn=dpi_urn, + aspectName="dataProcessInstanceProperties", + aspect=DataProcessInstancePropertiesClass( + name="Test Pipeline Run", + type="BATCH_SCHEDULED", + created=AuditStampClass( + time=1640692800000, actor="urn:li:corpuser:datahub" + ), + ), + ), + # Run Event aspect + MetadataChangeProposalWrapper( + entityType="dataProcessInstance", + entityUrn=dpi_urn, + aspectName="dataProcessInstanceRunEvent", + aspect=DataProcessInstanceRunEventClass( + timestampMillis=1704067200000, + eventGranularity=TimeWindowSizeClass(unit="WEEK", multiple=1), + status="COMPLETE", + ), + ), + # Platform Instance aspect + MetadataChangeProposalWrapper( + entityType="dataProcessInstance", + entityUrn=dpi_urn, + aspectName="dataPlatformInstance", + aspect=DataPlatformInstanceClass( + platform="urn:li:dataPlatform:airflow", + instance="urn:li:dataPlatformInstance:(urn:li:dataPlatform:airflow,1234567890)", + ), + ), + MetadataChangeProposalWrapper( + entityType="dataPlatformInstance", + entityUrn="urn:li:dataPlatformInstance:(urn:li:dataPlatform:airflow,1234567890)", + aspectName="dataPlatformInstanceProperties", + aspect=DataPlatformInstancePropertiesClass( + name="my process instance", + ), + ), + # SubTypes aspect + MetadataChangeProposalWrapper( + entityType="dataProcessInstance", + entityUrn=dpi_urn, + aspectName="subTypes", + aspect=SubTypesClass(typeNames=["TEST", "BATCH_JOB"]), + ), + # Container aspect + MetadataChangeProposalWrapper( + entityType="dataProcessInstance", + entityUrn=dpi_urn, + aspectName="container", + aspect=ContainerClass(container="urn:li:container:testGroup1"), + ), + MetadataChangeProposalWrapper( + entityType="container", + entityUrn="urn:li:container:testGroup1", + aspectName="containerProperties", + aspect=ContainerPropertiesClass(name="testGroup1"), + ), + # ML Training Run Properties aspect + MetadataChangeProposalWrapper( + entityType="dataProcessInstance", + entityUrn=dpi_urn, + aspectName="mlTrainingRunProperties", + aspect=MLTrainingRunPropertiesClass( + id="test-training-run-123", + trainingMetrics=[ + MLMetricClass( + name="accuracy", + description="accuracy of the model", + value="0.95", + ), + MLMetricClass( + name="loss", + description="accuracy loss of the model", + value="0.05", + ), + ], + hyperParams=[ + MLHyperParamClass( + name="learningRate", + description="rate of learning", + value="0.001", + ), + MLHyperParamClass( + name="batchSize", description="size of the batch", value="32" + ), + ], + outputUrls=["s3://my-bucket/ml/output"], + ), + ), + ] + + file_emitter = FileEmitter(filename) + for mcp in mcps: + file_emitter.emit(mcp) + file_emitter.close() + + +@pytest.fixture(scope="module", autouse=False) +def ingest_cleanup_data(auth_session, graph_client, request): + new_file, filename = tempfile.mkstemp(suffix=".json") + try: + create_test_data(filename) + print("ingesting data process instance test data") + ingest_file_via_rest(auth_session, filename) + wait_for_writes_to_sync() + yield + print("removing data process instance test data") + delete_urns_from_file(graph_client, filename) + wait_for_writes_to_sync() + finally: + os.remove(filename) + + +@pytest.mark.integration +def test_search_dpi(auth_session, ingest_cleanup_data): + """Test DPI search and validation of returned fields using GraphQL.""" + + json = { + "query": """query scrollAcrossEntities($input: ScrollAcrossEntitiesInput!) { + scrollAcrossEntities(input: $input) { + nextScrollId + count + total + searchResults { + entity { + ... on DataProcessInstance { + urn + properties { + name + externalUrl + } + dataPlatformInstance { + platform { + urn + name + } + } + subTypes { + typeNames + } + container { + urn + } + platform { + urn + name + properties { + type + } + } + mlTrainingRunProperties { + id + trainingMetrics { + name + value + } + hyperParams { + name + value + } + outputUrls + } + } + } + } + } + }""", + "variables": { + "input": {"types": ["DATA_PROCESS_INSTANCE"], "query": dpi_id, "count": 10} + }, + } + + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json + ) + response.raise_for_status() + res_data = response.json() + + # Basic response structure validation + assert res_data, "Response should not be empty" + assert "data" in res_data, "Response should contain 'data' field" + print("RESPONSE DATA:" + str(res_data)) + assert ( + "scrollAcrossEntities" in res_data["data"] + ), "Response should contain 'scrollAcrossEntities' field" + + search_results = res_data["data"]["scrollAcrossEntities"] + assert ( + "searchResults" in search_results + ), "Response should contain 'searchResults' field" + + results = search_results["searchResults"] + assert len(results) > 0, "Should find at least one result" + + # Find our test entity + test_entity = None + for result in results: + if result["entity"]["urn"] == dpi_urn: + test_entity = result["entity"] + break + + assert test_entity is not None, f"Should find test entity with URN {dpi_urn}" + + # Validate fields + props = test_entity["properties"] + assert props["name"] == "Test Pipeline Run" + + platform_instance = test_entity["dataPlatformInstance"] + assert platform_instance["platform"]["urn"] == "urn:li:dataPlatform:airflow" + + sub_types = test_entity["subTypes"] + assert set(sub_types["typeNames"]) == {"TEST", "BATCH_JOB"} + + container = test_entity["container"] + assert container["urn"] == "urn:li:container:testGroup1" + + ml_props = test_entity["mlTrainingRunProperties"] + assert ml_props["id"] == "test-training-run-123" + assert ml_props["trainingMetrics"][0] == {"name": "accuracy", "value": "0.95"} + assert ml_props["trainingMetrics"][1] == {"name": "loss", "value": "0.05"} + assert ml_props["hyperParams"][0] == {"name": "learningRate", "value": "0.001"} + assert ml_props["hyperParams"][1] == {"name": "batchSize", "value": "32"} + assert ml_props["outputUrls"][0] == "s3://my-bucket/ml/output" diff --git a/smoke-test/tests/entity_versioning/__init__.py b/smoke-test/tests/entity_versioning/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/smoke-test/tests/entity_versioning/test_versioning.py b/smoke-test/tests/entity_versioning/test_versioning.py new file mode 100644 index 00000000000000..c331cc5305a336 --- /dev/null +++ b/smoke-test/tests/entity_versioning/test_versioning.py @@ -0,0 +1,64 @@ +import pytest + + +@pytest.fixture(scope="module", autouse=True) +def test_link_unlink_version(auth_session): + """Fixture to execute setup before and tear down after all tests are run""" + res_data = link_version(auth_session) + + assert res_data + assert res_data["data"] + assert res_data["data"]["linkAssetVersion"] + assert ( + res_data["data"]["linkAssetVersion"] + == "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)" + ) + + res_data = unlink_version(auth_session) + + assert res_data + assert res_data["data"] + assert res_data["data"]["unlinkAssetVersion"] + + +def link_version(auth_session): + json = { + "mutation": """mutation linkAssetVersion($input: LinkVersionInput!) {\n + linkAssetVersion(input: $input) + }\n + }""", + "variables": { + "input": { + "version": "1233456", + "versionSet": "urn:li:versionSet:(12345678910,dataset)", + "linkedEntity": "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)", + } + }, + } + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json + ) + response.raise_for_status() + + return response.json() + + +def unlink_version(auth_session): + json = { + "mutation": """mutation unlinkAssetVersion($input: UnlinkVersionInput!) {\n + unlinkAssetVersion(input: $input) + }\n + }""", + "variables": { + "input": { + "versionSet": "urn:li:versionSet:(12345678910,dataset)", + "unlinkedEntity": "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)", + } + }, + } + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json + ) + response.raise_for_status() + + return response.json() diff --git a/smoke-test/tests/ml_models/__init__.py b/smoke-test/tests/ml_models/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/smoke-test/tests/ml_models/test_ml_models.py b/smoke-test/tests/ml_models/test_ml_models.py new file mode 100644 index 00000000000000..59821ab3e3cc41 --- /dev/null +++ b/smoke-test/tests/ml_models/test_ml_models.py @@ -0,0 +1,133 @@ +import logging +import os +import tempfile +from random import randint + +import pytest +from datahub.emitter.mce_builder import make_ml_model_group_urn, make_ml_model_urn +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.ingestion.api.common import PipelineContext, RecordEnvelope +from datahub.ingestion.api.sink import NoopWriteCallback +from datahub.ingestion.graph.client import DataHubGraph +from datahub.ingestion.sink.file import FileSink, FileSinkConfig +from datahub.metadata.schema_classes import ( + MLModelGroupPropertiesClass, + MLModelPropertiesClass, +) + +from tests.utils import ( + delete_urns_from_file, + get_sleep_info, + ingest_file_via_rest, + wait_for_writes_to_sync, +) + +logger = logging.getLogger(__name__) + +# Generate unique model names for testing +start_index = randint(10, 10000) +model_names = [f"test_model_{i}" for i in range(start_index, start_index + 3)] +model_group_urn = make_ml_model_group_urn("workbench", "test_group", "DEV") +model_urns = [make_ml_model_urn("workbench", name, "DEV") for name in model_names] + + +class FileEmitter: + def __init__(self, filename: str) -> None: + self.sink: FileSink = FileSink( + ctx=PipelineContext(run_id="create_test_data"), + config=FileSinkConfig(filename=filename), + ) + + def emit(self, event): + self.sink.write_record_async( + record_envelope=RecordEnvelope(record=event, metadata={}), + write_callback=NoopWriteCallback(), + ) + + def close(self): + self.sink.close() + + +def create_test_data(filename: str): + # Create model group + model_group_mcp = MetadataChangeProposalWrapper( + entityUrn=str(model_group_urn), + aspect=MLModelGroupPropertiesClass( + description="Test model group for integration testing", + trainingJobs=["urn:li:dataProcessInstance:test_job"], + ), + ) + + # Create models that belong to the group + model_mcps = [ + MetadataChangeProposalWrapper( + entityUrn=model_urn, + aspect=MLModelPropertiesClass( + name=f"Test Model ({model_urn})", + description=f"Test model {model_urn}", + groups=[str(model_group_urn)], + trainingJobs=["urn:li:dataProcessInstance:test_job"], + ), + ) + for model_urn in model_urns + ] + + file_emitter = FileEmitter(filename) + for mcps in [model_group_mcp] + model_mcps: + file_emitter.emit(mcps) + + file_emitter.close() + + +sleep_sec, sleep_times = get_sleep_info() + + +@pytest.fixture(scope="module", autouse=False) +def ingest_cleanup_data(auth_session, graph_client, request): + new_file, filename = tempfile.mkstemp(suffix=".json") + try: + create_test_data(filename) + print("ingesting ml model test data") + ingest_file_via_rest(auth_session, filename) + wait_for_writes_to_sync() + yield + print("removing ml model test data") + delete_urns_from_file(graph_client, filename) + wait_for_writes_to_sync() + finally: + os.remove(filename) + + +@pytest.mark.integration +def test_create_ml_models(graph_client: DataHubGraph, ingest_cleanup_data): + """Test creation and validation of ML models and model groups.""" + + # Validate model group properties + fetched_group_props = graph_client.get_aspect( + str(model_group_urn), MLModelGroupPropertiesClass + ) + assert fetched_group_props is not None + assert fetched_group_props.description == "Test model group for integration testing" + assert fetched_group_props.trainingJobs == ["urn:li:dataProcessInstance:test_job"] + + # Validate individual models + for model_urn in model_urns: + fetched_model_props = graph_client.get_aspect(model_urn, MLModelPropertiesClass) + assert fetched_model_props is not None + assert fetched_model_props.name == f"Test Model ({model_urn})" + assert fetched_model_props.description == f"Test model {model_urn}" + assert str(model_group_urn) in (fetched_model_props.groups or []) + assert fetched_model_props.trainingJobs == [ + "urn:li:dataProcessInstance:test_job" + ] + + # Validate relationships between models and group + related_models = set() + for e in graph_client.get_related_entities( + str(model_group_urn), + relationship_types=["MemberOf"], + direction=DataHubGraph.RelationshipDirection.INCOMING, + ): + related_models.add(e.urn) + + assert set(model_urns) == related_models diff --git a/test-models/build.gradle b/test-models/build.gradle index e8733f0525870b..89bf4ec445440d 100644 --- a/test-models/build.gradle +++ b/test-models/build.gradle @@ -18,3 +18,4 @@ idea { } sourceSets.mainGeneratedDataTemplate.java.srcDirs('src/main/javaPegasus/') +spotlessJava.dependsOn generateTestDataTemplate \ No newline at end of file