Skip to content

Commit

Permalink
Merge branch 'datahub-project:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
anshbansal authored Jan 29, 2025
2 parents 454b5f0 + cc0d43f commit 493ac76
Show file tree
Hide file tree
Showing 38 changed files with 1,488 additions and 182 deletions.
14 changes: 7 additions & 7 deletions .github/workflows/docker-unified.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ jobs:
tag: ${{ steps.tag.outputs.tag }}
slim_tag: ${{ steps.tag.outputs.slim_tag }}
full_tag: ${{ steps.tag.outputs.full_tag }}
short_sha: ${{ steps.tag.outputs.short_sha }} # needed for auto-deploy
unique_tag: ${{ steps.tag.outputs.unique_tag }}
unique_slim_tag: ${{ steps.tag.outputs.unique_slim_tag }}
unique_full_tag: ${{ steps.tag.outputs.unique_full_tag }}
Expand All @@ -64,6 +65,8 @@ jobs:
postgres_setup_change: ${{ steps.ci-optimize.outputs.postgres-setup-change == 'true' }}
elasticsearch_setup_change: ${{ steps.ci-optimize.outputs.elasticsearch-setup-change == 'true' }}
smoke_test_change: ${{ steps.ci-optimize.outputs.smoke-test-change == 'true' }}
integrations_service_change: "false"
datahub_executor_change: "false"
steps:
- name: Check out the repo
uses: acryldata/sane-checkout-action@v3
Expand Down Expand Up @@ -863,7 +866,8 @@ jobs:
context: .
file: ./docker/datahub-ingestion/Dockerfile
platforms: linux/amd64,linux/arm64/v8
depot-project: ${{ vars.DEPOT_PROJECT_ID }}
# Workaround 2025-01-25 - Depot publishing errors
depot-project: ${{ (startsWith(github.ref, 'refs/tags/') || github.event_name == 'release') && '' || vars.DEPOT_PROJECT_ID }}
- name: Compute Tag
id: tag
run: echo "tag=${{ needs.setup.outputs.ingestion_change == 'true' && needs.setup.outputs.unique_slim_tag || 'head-slim' }}" >> "$GITHUB_OUTPUT"
Expand Down Expand Up @@ -962,7 +966,8 @@ jobs:
context: .
file: ./docker/datahub-ingestion/Dockerfile
platforms: linux/amd64,linux/arm64/v8
depot-project: ${{ vars.DEPOT_PROJECT_ID }}
# Workaround 2025-01-25 - Depot publishing errors
depot-project: ${{ (startsWith(github.ref, 'refs/tags/') || github.event_name == 'release') && '' || vars.DEPOT_PROJECT_ID }}
- name: Compute Tag (Full)
id: tag
run: echo "tag=${{ needs.setup.outputs.ingestion_change == 'true' && needs.setup.outputs.unique_tag || 'head' }}" >> "$GITHUB_OUTPUT"
Expand Down Expand Up @@ -1177,11 +1182,6 @@ jobs:
docker pull '${{ env.DATAHUB_ELASTIC_SETUP_IMAGE }}:head'
docker tag '${{ env.DATAHUB_ELASTIC_SETUP_IMAGE }}:head' '${{ env.DATAHUB_ELASTIC_SETUP_IMAGE }}:${{ needs.setup.outputs.unique_tag }}'
fi
if [ '${{ needs.setup.outputs.integrations_service_change }}' == 'false' ]; then
echo 'datahub-integration-service head images'
docker pull '${{ env.DATAHUB_INTEGRATIONS_IMAGE }}:head'
docker tag '${{ env.DATAHUB_INTEGRATIONS_IMAGE }}:head' '${{ env.DATAHUB_INTEGRATIONS_IMAGE }}:${{ needs.setup.outputs.unique_tag }}'
fi
- name: CI Slim Head Images
run: |
if [ '${{ needs.setup.outputs.ingestion_change }}' == 'false' ]; then
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@
import com.linkedin.common.urn.Urn;
import com.linkedin.datahub.graphql.QueryContext;
import com.linkedin.datahub.graphql.generated.MLModelGroupProperties;
import com.linkedin.datahub.graphql.generated.MLModelLineageInfo;
import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper;
import com.linkedin.datahub.graphql.types.common.mappers.TimeStampToAuditStampMapper;
import com.linkedin.datahub.graphql.types.mappers.EmbeddedModelMapper;
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;

Expand Down Expand Up @@ -33,10 +36,40 @@ public MLModelGroupProperties apply(
result.setVersion(VersionTagMapper.map(context, mlModelGroupProperties.getVersion()));
}
result.setCreatedAt(mlModelGroupProperties.getCreatedAt());
if (mlModelGroupProperties.hasCreated()) {
result.setCreated(
TimeStampToAuditStampMapper.map(context, mlModelGroupProperties.getCreated()));
}
if (mlModelGroupProperties.getName() != null) {
result.setName(mlModelGroupProperties.getName());
} else {
// backfill name from URN for backwards compatibility
result.setName(entityUrn.getEntityKey().get(1)); // indexed access is safe here
}

if (mlModelGroupProperties.hasLastModified()) {
result.setLastModified(
TimeStampToAuditStampMapper.map(context, mlModelGroupProperties.getLastModified()));
}

result.setCustomProperties(
CustomPropertiesMapper.map(mlModelGroupProperties.getCustomProperties(), entityUrn));

final MLModelLineageInfo lineageInfo = new MLModelLineageInfo();
if (mlModelGroupProperties.hasTrainingJobs()) {
lineageInfo.setTrainingJobs(
mlModelGroupProperties.getTrainingJobs().stream()
.map(urn -> urn.toString())
.collect(Collectors.toList()));
}
if (mlModelGroupProperties.hasDownstreamJobs()) {
lineageInfo.setDownstreamJobs(
mlModelGroupProperties.getDownstreamJobs().stream()
.map(urn -> urn.toString())
.collect(Collectors.toList()));
}
result.setMlModelLineageInfo(lineageInfo);

return result;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import com.linkedin.common.urn.Urn;
import com.linkedin.datahub.graphql.QueryContext;
import com.linkedin.datahub.graphql.generated.MLModelGroup;
import com.linkedin.datahub.graphql.generated.MLModelLineageInfo;
import com.linkedin.datahub.graphql.generated.MLModelProperties;
import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper;
import com.linkedin.datahub.graphql.types.common.mappers.TimeStampToAuditStampMapper;
Expand Down Expand Up @@ -87,6 +88,20 @@ public MLModelProperties apply(
.collect(Collectors.toList()));
}
result.setTags(mlModelProperties.getTags());
final MLModelLineageInfo lineageInfo = new MLModelLineageInfo();
if (mlModelProperties.hasTrainingJobs()) {
lineageInfo.setTrainingJobs(
mlModelProperties.getTrainingJobs().stream()
.map(urn -> urn.toString())
.collect(Collectors.toList()));
}
if (mlModelProperties.hasDownstreamJobs()) {
lineageInfo.setDownstreamJobs(
mlModelProperties.getDownstreamJobs().stream()
.map(urn -> urn.toString())
.collect(Collectors.toList()));
}
result.setMlModelLineageInfo(lineageInfo);

return result;
}
Expand Down
29 changes: 29 additions & 0 deletions datahub-graphql-core/src/main/resources/lineage.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,32 @@ input LineageEdge {
"""
upstreamUrn: String!
}

"""
Represents lineage information for ML entities.
"""
type MLModelLineageInfo {
"""
List of jobs or processes used to train the model.
"""
trainingJobs: [String!]

"""
List of jobs or processes that use this model.
"""
downstreamJobs: [String!]
}

extend type MLModelProperties {
"""
Information related to lineage to this model group
"""
mlModelLineageInfo: MLModelLineageInfo
}

extend type MLModelGroupProperties {
"""
Information related to lineage to this model group
"""
mlModelLineageInfo: MLModelLineageInfo
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package com.linkedin.datahub.graphql.types.mlmodel.mappers;

import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertNotNull;
import static org.testng.Assert.assertNull;

import com.linkedin.common.urn.Urn;
import com.linkedin.ml.metadata.MLModelGroupProperties;
import java.net.URISyntaxException;
import org.testng.annotations.Test;

public class MLModelGroupPropertiesMapperTest {

@Test
public void testMapMLModelGroupProperties() throws URISyntaxException {
// Create backend ML Model Group Properties
MLModelGroupProperties input = new MLModelGroupProperties();

// Set description
input.setDescription("a ml trust model group");

// Set Name
input.setName("ML trust model group");

// Create URN
Urn groupUrn =
Urn.createFromString(
"urn:li:mlModelGroup:(urn:li:dataPlatform:sagemaker,another-group,PROD)");

// Map the properties
com.linkedin.datahub.graphql.generated.MLModelGroupProperties result =
MLModelGroupPropertiesMapper.map(null, input, groupUrn);

// Verify mapped properties
assertNotNull(result);
assertEquals(result.getDescription(), "a ml trust model group");
assertEquals(result.getName(), "ML trust model group");

// Verify lineage info is null as in the mock data
assertNotNull(result.getMlModelLineageInfo());
assertNull(result.getMlModelLineageInfo().getTrainingJobs());
assertNull(result.getMlModelLineageInfo().getDownstreamJobs());
}

@Test
public void testMapWithMinimalProperties() throws URISyntaxException {
// Create backend ML Model Group Properties with minimal information
MLModelGroupProperties input = new MLModelGroupProperties();

// Create URN
Urn groupUrn =
Urn.createFromString(
"urn:li:mlModelGroup:(urn:li:dataPlatform:sagemaker,another-group,PROD)");

// Map the properties
com.linkedin.datahub.graphql.generated.MLModelGroupProperties result =
MLModelGroupPropertiesMapper.map(null, input, groupUrn);

// Verify basic mapping with minimal properties
assertNotNull(result);
assertNull(result.getDescription());

// Verify lineage info is null
assertNotNull(result.getMlModelLineageInfo());
assertNull(result.getMlModelLineageInfo().getTrainingJobs());
assertNull(result.getMlModelLineageInfo().getDownstreamJobs());
}
}
Loading

0 comments on commit 493ac76

Please sign in to comment.