Skip to content

Commit

Permalink
Add preprocessing child workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
jraddaoui committed Apr 18, 2024
1 parent f657aaf commit 1009691
Show file tree
Hide file tree
Showing 10 changed files with 392 additions and 21 deletions.
25 changes: 24 additions & 1 deletion Tiltfile
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,31 @@ KUBE_OVERLAY = 'hack/kube/overlays/dev-a3m'
if PRES_SYS == 'am':
KUBE_OVERLAY = 'hack/kube/overlays/dev-am'

# Load Kustomize YAML
yaml = kustomize(KUBE_OVERLAY)

# Preprocessing
PREPROCESSING_PATH = os.environ.get("PREPROCESSING_PATH", "")
if PREPROCESSING_PATH != "":
# Load preprocessing Tiltfile for Enduro
load_dynamic(PREPROCESSING_PATH + "/Tiltfile.enduro")
# Get Enduro a3m/am worker k8s manifest
if PRES_SYS == "a3m":
pres_yaml, yaml = filter_yaml(yaml, name="^enduro-a3m$", kind="StatefulSet")
else:
pres_yaml, yaml = filter_yaml(yaml, name="^enduro-am$", kind="Deployment")
# Append preprocessing volume and volume mount to worker container,
# this will only work in single node k8s cluster deployments
volume = {"name": "shared-dir", "persistentVolumeClaim": {"claimName": "preprocessing-pvc"}}
volume_mount = {"name": "shared-dir", "mountPath": "/tmp"}
pres_obj = decode_yaml(pres_yaml)
pres_obj["spec"]["template"]["spec"]["volumes"].append(volume)
pres_obj["spec"]["template"]["spec"]["containers"][0]["volumeMounts"].append(volume_mount)
pres_yaml = encode_yaml(pres_obj)
yaml = [yaml, pres_yaml]

# Load Kubernetes resources
k8s_yaml(kustomize(KUBE_OVERLAY))
k8s_yaml(yaml)

# Configure trigger mode
trigger_mode = TRIGGER_MODE_MANUAL
Expand Down
1 change: 1 addition & 0 deletions docs/src/dev-manual/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ This is the developer manual for Enduro SDPS.
- [Dependency management](deps.md)
- [Environment setup](devel.md)
- [Working with Archivematica](archivematica.md)
- [Preprocessing child workflow](preprocessing.md)
- [Logging](logging.md)
- [Makefile](make.md)
- [Testing](testing.md)
9 changes: 9 additions & 0 deletions docs/src/dev-manual/devel.md
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,14 @@ are planning to use Archivematica as preservation system.
Build and use a local version of a3m. Requires to have the `a3m` repository
cloned as a sibling of this repository folder.

### PREPROCESSING_PATH

Relative path to a preprocessing child workflow repository. It loads a Tiltfile
called `Tiltfile.enduro` from that repository and mounts a presistent volume
claim (PVC) in the preservation system pod. That PVC must be defined in the
preprocessing and be called `preprocessing-pvc`. Check the [Preprocessing child
workflow] docs to configure the child workflow execution.

## Tilt UI helpers

### Upload to Minio
Expand Down Expand Up @@ -259,3 +267,4 @@ is sometimes not setup properly. To solve it, from the Tilt UI, restart the
[visual studio code]: https://code.visualstudio.com/
[working with archivematica]: archivematica.md
[devbox]: https://www.jetpack.io/devbox/docs/quickstart/#install-devbox
[preprocessing child workflow]: preprocessing.md
34 changes: 34 additions & 0 deletions docs/src/dev-manual/preprocessing.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Preprocessing child workflow

The processing workflow can be extended with the execution of a preprocessing
child workflow.

## Configuration

### `.tilt.env`

Check the [Tilt environment configuration].

### `enduro.toml`

```toml
# Optional preprocessing child workflow configuration.
[preprocessing]
# enabled triggers the execution of the child workflow, when set to false all other
# options are ignored.
enabled = false
# extract determines if the package extraction happens on the child workflow.
extract = true
# sharedPath is the full path to the directory used to share the package between workflows,
# required when enabled is set to true.
sharedPath = "/tmp"

# Temporal configuration to trigger the preprocessing child workflow, all fields are
# required when enabled is set to true.
[preprocessing.temporal]
namespace = "default"
taskQueue = "preprocessing"
workflowName = "preprocessing"
```

[tilt environment configuration]: devel.md#preprocessing_path
18 changes: 18 additions & 0 deletions enduro.toml
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,21 @@ bucket = "sips"
enabled = false
address = ""
samplingRatio = 1.0

# Optional preprocessing child workflow configuration.
[preprocessing]
# enabled triggers the execution of the child workflow, when set to false all other
# options are ignored.
enabled = false
# extract determines if the package extraction happens on the child workflow.
extract = true
# sharedPath is the full path to the directory used to share the package between workflows,
# required when enabled is set to true.
sharedPath = "/tmp"

# Temporal configuration to trigger the preprocessing child workflow, all fields are
# required when enabled is set to true.
[preprocessing.temporal]
namespace = "default"
taskQueue = "preprocessing"
workflowName = "preprocessing"
30 changes: 19 additions & 11 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/artefactual-sdps/enduro/internal/api"
"github.com/artefactual-sdps/enduro/internal/db"
"github.com/artefactual-sdps/enduro/internal/event"
"github.com/artefactual-sdps/enduro/internal/preprocessing"
"github.com/artefactual-sdps/enduro/internal/pres"
"github.com/artefactual-sdps/enduro/internal/storage"
"github.com/artefactual-sdps/enduro/internal/telemetry"
Expand All @@ -30,17 +31,18 @@ type Configuration struct {
Debug bool
DebugListen string

A3m a3m.Config
AM am.Config
API api.Config
Database db.Config
Event event.Config
Preservation pres.Config
Storage storage.Config
Temporal temporal.Config
Upload upload.Config
Watcher watcher.Config
Telemetry telemetry.Config
A3m a3m.Config
AM am.Config
API api.Config
Database db.Config
Event event.Config
Preservation pres.Config
Storage storage.Config
Temporal temporal.Config
Upload upload.Config
Watcher watcher.Config
Telemetry telemetry.Config
Preprocessing preprocessing.Config
}

func (c Configuration) Validate() error {
Expand All @@ -57,6 +59,12 @@ func (c Configuration) Validate() error {
return err
}
}
if config, ok := interface{}(c.Preprocessing).(ConfigurationValidator); ok {
err := config.Validate()
if err != nil {
return err

Check warning on line 65 in internal/config/config.go

View check run for this annotation

Codecov / codecov/patch

internal/config/config.go#L65

Added line #L65 was not covered by tests
}
}
return nil
}

Expand Down
44 changes: 44 additions & 0 deletions internal/preprocessing/preprocessing.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package preprocessing

import "errors"

type Config struct {
// Enable preprocessing child workflow.
Enabled bool
// Extract package in preprocessing.
Extract bool
// Local path shared between workers.
SharedPath string
// Temporal configuration.
Temporal Temporal
}

type Temporal struct {
Namespace string
TaskQueue string
WorkflowName string
}

type WorkflowParams struct {
// Relative path to the shared path.
RelativePath string
}

type WorkflowResult struct {
// Relative path to the shared path.
RelativePath string
}

// Validate implements config.ConfigurationValidator.
func (c Config) Validate() error {
if !c.Enabled {
return nil
}
if c.SharedPath == "" {
return errors.New("sharedPath is required in the [preprocessing] configuration")
}
if c.Temporal.Namespace == "" || c.Temporal.TaskQueue == "" || c.Temporal.WorkflowName == "" {
return errors.New("namespace, taskQueue and workflowName are required in the [preprocessing.temporal] configuration")
}
return nil
}
65 changes: 65 additions & 0 deletions internal/preprocessing/preprocessing_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package preprocessing_test

import (
"testing"

"gotest.tools/v3/assert"

"github.com/artefactual-sdps/enduro/internal/preprocessing"
)

func TestPreprocessingConfig(t *testing.T) {
t.Parallel()

type test struct {
name string
config preprocessing.Config
wantErr string
}
for _, tt := range []test{
{
name: "Validates if not enabled",
config: preprocessing.Config{
Enabled: false,
},
},
{
name: "Validates with all required fields",
config: preprocessing.Config{
Enabled: true,
SharedPath: "/tmp",
Temporal: preprocessing.Temporal{
Namespace: "default",
TaskQueue: "preprocessing",
WorkflowName: "preprocessing",
},
},
},
{
name: "Returns error if shared path is missing",
config: preprocessing.Config{
Enabled: true,
},
wantErr: "sharedPath is required in the [preprocessing] configuration",
},
{
name: "Returns error if temporal config is missing",
config: preprocessing.Config{
Enabled: true,
SharedPath: "/tmp",
},
wantErr: "namespace, taskQueue and workflowName are required in the [preprocessing.temporal] configuration",
},
} {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()

err := tt.config.Validate()
if tt.wantErr != "" {
assert.Error(t, err, tt.wantErr)
return
}
assert.NilError(t, err)
})
}
}
46 changes: 44 additions & 2 deletions internal/workflow/processing.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@ package workflow
import (
"errors"
"fmt"
"path/filepath"
"time"

"github.com/go-logr/logr"
"github.com/google/uuid"
"go.artefactual.dev/tools/ref"
temporalapi_enums "go.temporal.io/api/enums/v1"
temporalsdk_temporal "go.temporal.io/sdk/temporal"
temporalsdk_workflow "go.temporal.io/sdk/workflow"

Expand All @@ -22,6 +24,7 @@ import (
"github.com/artefactual-sdps/enduro/internal/enums"
"github.com/artefactual-sdps/enduro/internal/fsutil"
"github.com/artefactual-sdps/enduro/internal/package_"
"github.com/artefactual-sdps/enduro/internal/preprocessing"
"github.com/artefactual-sdps/enduro/internal/temporal"
"github.com/artefactual-sdps/enduro/internal/watcher"
"github.com/artefactual-sdps/enduro/internal/workflow/activities"
Expand Down Expand Up @@ -320,8 +323,8 @@ func (w *ProcessingWorkflow) SessionHandler(
tinfo.TempPath = downloadResult.Path
}

// Unarchive the transfer if it's not a directory.
if !tinfo.req.IsDir {
// Unarchive the transfer if it's not a directory and it's not part of the preprocessing child workflow.
if !tinfo.req.IsDir && (!w.cfg.Preprocessing.Enabled || !w.cfg.Preprocessing.Extract) {
activityOpts := withActivityOptsForLocalAction(sessCtx)
var result activities.UnarchiveActivityResult
err := temporalsdk_workflow.ExecuteActivity(
Expand All @@ -339,6 +342,11 @@ func (w *ProcessingWorkflow) SessionHandler(
tinfo.req.IsDir = result.IsDir
}

// Preprocessing child workflow.
if err := w.preprocessing(sessCtx, tinfo); err != nil {
return err

Check warning on line 347 in internal/workflow/processing.go

View check run for this annotation

Codecov / codecov/patch

internal/workflow/processing.go#L347

Added line #L347 was not covered by tests
}

// Bundle.
{
// For the a3m workflow bundle the transfer to a directory shared with
Expand Down Expand Up @@ -799,3 +807,37 @@ func (w *ProcessingWorkflow) transferAM(sessCtx temporalsdk_workflow.Context, ti

return nil
}

func (w *ProcessingWorkflow) preprocessing(ctx temporalsdk_workflow.Context, tinfo *TransferInfo) error {
if !w.cfg.Preprocessing.Enabled {
return nil
}

// TODO: move transfer if tinfo.TempPath is not inside w.prepConfig.SharedPath and
// allow to configure the processing directory instead of using the default temp dir.
realPath, err := filepath.Rel(w.cfg.Preprocessing.SharedPath, tinfo.TempPath)
if err != nil {
return err

Check warning on line 820 in internal/workflow/processing.go

View check run for this annotation

Codecov / codecov/patch

internal/workflow/processing.go#L820

Added line #L820 was not covered by tests
}

preCtx := temporalsdk_workflow.WithChildOptions(ctx, temporalsdk_workflow.ChildWorkflowOptions{
Namespace: w.cfg.Preprocessing.Temporal.Namespace,
TaskQueue: w.cfg.Preprocessing.Temporal.TaskQueue,
WorkflowID: fmt.Sprintf("%s-%s", w.cfg.Preprocessing.Temporal.WorkflowName, uuid.New().String()),
ParentClosePolicy: temporalapi_enums.PARENT_CLOSE_POLICY_TERMINATE,
})
var result preprocessing.WorkflowResult
err = temporalsdk_workflow.ExecuteChildWorkflow(
preCtx,
w.cfg.Preprocessing.Temporal.WorkflowName,
preprocessing.WorkflowParams{RelativePath: realPath},
).Get(preCtx, &result)
if err != nil {
return err

Check warning on line 836 in internal/workflow/processing.go

View check run for this annotation

Codecov / codecov/patch

internal/workflow/processing.go#L836

Added line #L836 was not covered by tests
}

tinfo.TempPath = filepath.Join(w.cfg.Preprocessing.SharedPath, filepath.Clean(result.RelativePath))
tinfo.req.IsDir = true

return nil
}
Loading

0 comments on commit 1009691

Please sign in to comment.