Skip to content

Commit

Permalink
feat(vdp): upload raw inputs for run log (#904)
Browse files Browse the repository at this point in the history
Because

- now, the run log inputs are raw data but not blob storage
- we want the inputs are download urls rather than dataURI

This commit

- move upload pipeline run inputs from worker to service
  - remove the worker upload inputs function
  - add the service upload inputs function
  • Loading branch information
chuang8511 authored Dec 5, 2024
1 parent b9298e5 commit 960f4c2
Show file tree
Hide file tree
Showing 15 changed files with 527 additions and 281 deletions.
19 changes: 10 additions & 9 deletions cmd/main/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -298,14 +298,16 @@ func main() {
InstillCoreHost: config.Config.Server.InstillCoreHost,
ComponentStore: compStore,
}),
MgmtPublicServiceClient: mgmtPublicServiceClient,
MgmtPrivateServiceClient: mgmtPrivateServiceClient,
MinioClient: minioClient,
ComponentStore: compStore,
Memory: ms,
WorkerUID: workerUID,
RetentionHandler: nil,
BinaryFetcher: binaryFetcher,
MgmtPublicServiceClient: mgmtPublicServiceClient,
MgmtPrivateServiceClient: mgmtPrivateServiceClient,
MinioClient: minioClient,
ComponentStore: compStore,
Memory: ms,
WorkerUID: workerUID,
RetentionHandler: nil,
BinaryFetcher: binaryFetcher,
ArtifactPublicServiceClient: artifactPublicServiceClient,
ArtifactPrivateServiceClient: artifactPrivateServiceClient,
},
)

Expand Down Expand Up @@ -511,7 +513,6 @@ func main() {
lw.RegisterActivity(cw.UpdatePipelineRunActivity)
lw.RegisterActivity(cw.UpsertComponentRunActivity)

mw.RegisterActivity(cw.UploadInputsToMinioActivity)
mw.RegisterActivity(cw.UploadOutputsToMinioActivity)
mw.RegisterActivity(cw.UploadRecipeToMinioActivity)
mw.RegisterActivity(cw.UploadComponentInputsActivity)
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ require (
github.com/influxdata/influxdb-client-go/v2 v2.12.3
github.com/instill-ai/protogen-go v0.3.3-alpha.0.20241129105617-c2c298e76498
github.com/instill-ai/usage-client v0.2.4-alpha.0.20240123081026-6c78d9a5197a
github.com/instill-ai/x v0.5.0-alpha.0.20241119141833-e4a78ca87792
github.com/instill-ai/x v0.5.0-alpha.0.20241203110942-cee5c110cba8
github.com/itchyny/gojq v0.12.14
github.com/jackc/pgx/v5 v5.5.5
github.com/jmoiron/sqlx v1.4.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1285,8 +1285,8 @@ github.com/instill-ai/protogen-go v0.3.3-alpha.0.20241129105617-c2c298e76498 h1:
github.com/instill-ai/protogen-go v0.3.3-alpha.0.20241129105617-c2c298e76498/go.mod h1:rf0UY7VpEgpaLudYEcjx5rnbuwlBaaLyD4FQmWLtgAY=
github.com/instill-ai/usage-client v0.2.4-alpha.0.20240123081026-6c78d9a5197a h1:gmy8BcCFDZQan40c/D3f62DwTYtlCwi0VrSax+pKffw=
github.com/instill-ai/usage-client v0.2.4-alpha.0.20240123081026-6c78d9a5197a/go.mod h1:EpX3Yr661uWULtZf5UnJHfr5rw2PDyX8ku4Kx0UtYFw=
github.com/instill-ai/x v0.5.0-alpha.0.20241119141833-e4a78ca87792 h1:b4lhXcFJ/kGGC1RErtItoI57paf9WXBCVpaPIAApldY=
github.com/instill-ai/x v0.5.0-alpha.0.20241119141833-e4a78ca87792/go.mod h1:jkVtaq9T2zAFA5N46tlV4K5EEVE7FcOVNbqY4wFWYz8=
github.com/instill-ai/x v0.5.0-alpha.0.20241203110942-cee5c110cba8 h1:w2F6sI6VbzIXUIh6HrSrV4k43pM/brj1jv6HT994+so=
github.com/instill-ai/x v0.5.0-alpha.0.20241203110942-cee5c110cba8/go.mod h1:jkVtaq9T2zAFA5N46tlV4K5EEVE7FcOVNbqY4wFWYz8=
github.com/intel/goresctrl v0.2.0/go.mod h1:+CZdzouYFn5EsxgqAQTEzMfwKwuc0fVdMrT9FCCAVRQ=
github.com/itchyny/gojq v0.12.14 h1:6k8vVtsrhQSYgSGg827AD+PVVaB1NLXEdX+dda2oZCc=
github.com/itchyny/gojq v0.12.14/go.mod h1:y1G7oO7XkcR1LPZO59KyoCRy08T3j9vDYRV0GgYSS+s=
Expand Down
21 changes: 15 additions & 6 deletions pkg/component/internal/util/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,15 +138,24 @@ func GetFileTypeByFilename(filename string) (string, error) {

func GetContentTypeFromBase64(base64String string) (string, error) {
// Remove the "data:" prefix and split at the first semicolon
contentType := strings.TrimPrefix(base64String, "data:")
if hasDataPrefix(base64String) {
contentType := strings.TrimPrefix(base64String, "data:")

parts := strings.SplitN(contentType, ";", 2)
if len(parts) != 2 {
return "", fmt.Errorf("invalid format")
parts := strings.SplitN(contentType, ";", 2)
if len(parts) != 2 {
return "", fmt.Errorf("invalid format")
}

// The first part is the content type
return parts[0], nil
}

// The first part is the content type
return parts[0], nil
b, err := base64.StdEncoding.DecodeString(base64String)
if err != nil {
return "", fmt.Errorf("decode base64 string: %w", err)
}
mimeType := strings.Split(mimetype.Detect(b).String(), ";")[0]
return mimeType, nil
}

func GetFileBase64Content(base64String string) string {
Expand Down
116 changes: 116 additions & 0 deletions pkg/service/blobstorage.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
package service

import (
"context"
"encoding/base64"
"fmt"
"mime"
"strings"
"time"

"github.com/gabriel-vasile/mimetype"
"google.golang.org/grpc/metadata"

"github.com/instill-ai/pipeline-backend/pkg/recipe"
"github.com/instill-ai/pipeline-backend/pkg/resource"
"github.com/instill-ai/pipeline-backend/pkg/utils"

artifactpb "github.com/instill-ai/protogen-go/artifact/artifact/v1alpha"
resourcex "github.com/instill-ai/x/resource"
)

func (s *service) uploadBlobAndGetDownloadURL(ctx context.Context, ns resource.Namespace, data string) (string, error) {
mimeType, err := getMimeType(data)
if err != nil {
return "", fmt.Errorf("get mime type: %w", err)
}
artifactClient := s.artifactPublicServiceClient
requesterUID, _ := resourcex.GetRequesterUIDAndUserUID(ctx)

vars, err := recipe.GenerateSystemVariables(ctx, recipe.SystemVariables{})

if err != nil {
return "", fmt.Errorf("generate system variables: %w", err)
}

ctx = metadata.NewOutgoingContext(ctx, utils.GetRequestMetadata(vars))

timestamp := time.Now().Format(time.RFC3339)
objectName := fmt.Sprintf("%s-%s%s", requesterUID.String(), timestamp, getFileExtension(mimeType))

// TODO: We will need to add the expiry days for the blob data.
// This will be addressed in ins-6857
resp, err := artifactClient.GetObjectUploadURL(ctx, &artifactpb.GetObjectUploadURLRequest{
NamespaceId: ns.NsID,
ObjectName: objectName,
ObjectExpireDays: 0,
})

if err != nil {
return "", fmt.Errorf("get upload url: %w", err)
}

uploadURL := resp.GetUploadUrl()
data = removePrefix(data)
b, err := base64.StdEncoding.DecodeString(data)
if err != nil {
return "", fmt.Errorf("decode base64 string: %w", err)
}

err = utils.UploadBlobData(ctx, uploadURL, mimeType, b, s.log)
if err != nil {
return "", fmt.Errorf("upload blob data: %w", err)
}

respDownloadURL, err := artifactClient.GetObjectDownloadURL(ctx, &artifactpb.GetObjectDownloadURLRequest{
NamespaceId: ns.NsID,
ObjectUid: resp.GetObject().GetUid(),
})
if err != nil {
return "", fmt.Errorf("get object download url: %w", err)
}

return respDownloadURL.GetDownloadUrl(), nil
}

func getMimeType(data string) (string, error) {
var mimeType string
if strings.HasPrefix(data, "data:") {
contentType := strings.TrimPrefix(data, "data:")
parts := strings.SplitN(contentType, ";", 2)
if len(parts) == 0 {
return "", fmt.Errorf("invalid data url")
}
mimeType = parts[0]
} else {
b, err := base64.StdEncoding.DecodeString(data)
if err != nil {
return "", fmt.Errorf("decode base64 string: %w", err)
}
mimeType = strings.Split(mimetype.Detect(b).String(), ";")[0]

}
return mimeType, nil
}

func getFileExtension(mimeType string) string {
ext, err := mime.ExtensionsByType(mimeType)
if err != nil {
return ""
}
if len(ext) == 0 {
return ""
}
return ext[0]
}

func removePrefix(data string) string {
if strings.HasPrefix(data, "data:") {
parts := strings.SplitN(data, ",", 2)
if len(parts) == 0 {
return ""
}
return parts[1]
}
return data
}
89 changes: 48 additions & 41 deletions pkg/service/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"github.com/instill-ai/pipeline-backend/pkg/resource"

componentstore "github.com/instill-ai/pipeline-backend/pkg/component/store"
artifactpb "github.com/instill-ai/protogen-go/artifact/artifact/v1alpha"
mgmtpb "github.com/instill-ai/protogen-go/core/mgmt/v1beta"
pb "github.com/instill-ai/protogen-go/vdp/pipeline/v1beta"
miniox "github.com/instill-ai/x/minio"
Expand Down Expand Up @@ -98,37 +99,41 @@ type TriggerResult struct {
// However, we keep it here for now because we may need it in the future.
// service is the implementation of the Service interface
type service struct {
repository repository.Repository
redisClient *redis.Client
temporalClient client.Client
component *componentstore.Store
mgmtPublicServiceClient mgmtpb.MgmtPublicServiceClient
mgmtPrivateServiceClient mgmtpb.MgmtPrivateServiceClient
aclClient acl.ACLClientInterface
converter Converter
minioClient miniox.MinioI
memory memory.MemoryStore
log *zap.Logger
workerUID uuid.UUID
retentionHandler MetadataRetentionHandler
binaryFetcher external.BinaryFetcher
repository repository.Repository
redisClient *redis.Client
temporalClient client.Client
component *componentstore.Store
mgmtPublicServiceClient mgmtpb.MgmtPublicServiceClient
mgmtPrivateServiceClient mgmtpb.MgmtPrivateServiceClient
aclClient acl.ACLClientInterface
converter Converter
minioClient miniox.MinioI
memory memory.MemoryStore
log *zap.Logger
workerUID uuid.UUID
retentionHandler MetadataRetentionHandler
binaryFetcher external.BinaryFetcher
artifactPublicServiceClient artifactpb.ArtifactPublicServiceClient
artifactPrivateServiceClient artifactpb.ArtifactPrivateServiceClient
}

// ServiceConfig is the configuration for the service
type ServiceConfig struct {
Repository repository.Repository
RedisClient *redis.Client
TemporalClient client.Client
ACLClient acl.ACLClientInterface
Converter Converter
MgmtPublicServiceClient mgmtpb.MgmtPublicServiceClient
MgmtPrivateServiceClient mgmtpb.MgmtPrivateServiceClient
MinioClient miniox.MinioI
ComponentStore *componentstore.Store
Memory memory.MemoryStore
WorkerUID uuid.UUID
RetentionHandler MetadataRetentionHandler
BinaryFetcher external.BinaryFetcher
Repository repository.Repository
RedisClient *redis.Client
TemporalClient client.Client
ACLClient acl.ACLClientInterface
Converter Converter
MgmtPublicServiceClient mgmtpb.MgmtPublicServiceClient
MgmtPrivateServiceClient mgmtpb.MgmtPrivateServiceClient
MinioClient miniox.MinioI
ComponentStore *componentstore.Store
Memory memory.MemoryStore
WorkerUID uuid.UUID
RetentionHandler MetadataRetentionHandler
BinaryFetcher external.BinaryFetcher
ArtifactPublicServiceClient artifactpb.ArtifactPublicServiceClient
ArtifactPrivateServiceClient artifactpb.ArtifactPrivateServiceClient
}

// NewService initiates a service instance
Expand All @@ -141,19 +146,21 @@ func NewService(
}

return &service{
repository: cfg.Repository,
redisClient: cfg.RedisClient,
temporalClient: cfg.TemporalClient,
mgmtPublicServiceClient: cfg.MgmtPublicServiceClient,
mgmtPrivateServiceClient: cfg.MgmtPrivateServiceClient,
component: cfg.ComponentStore,
aclClient: cfg.ACLClient,
converter: cfg.Converter,
minioClient: cfg.MinioClient,
memory: cfg.Memory,
log: zapLogger,
workerUID: cfg.WorkerUID,
retentionHandler: cfg.RetentionHandler,
binaryFetcher: cfg.BinaryFetcher,
repository: cfg.Repository,
redisClient: cfg.RedisClient,
temporalClient: cfg.TemporalClient,
mgmtPublicServiceClient: cfg.MgmtPublicServiceClient,
mgmtPrivateServiceClient: cfg.MgmtPrivateServiceClient,
component: cfg.ComponentStore,
aclClient: cfg.ACLClient,
converter: cfg.Converter,
minioClient: cfg.MinioClient,
memory: cfg.Memory,
log: zapLogger,
workerUID: cfg.WorkerUID,
retentionHandler: cfg.RetentionHandler,
binaryFetcher: cfg.BinaryFetcher,
artifactPublicServiceClient: cfg.ArtifactPublicServiceClient,
artifactPrivateServiceClient: cfg.ArtifactPrivateServiceClient,
}
}
Loading

0 comments on commit 960f4c2

Please sign in to comment.