Skip to content

Commit

Permalink
Add preprocessing child workflow
Browse files Browse the repository at this point in the history
Allows to configure and trigger a custom preprocessing child workflow
to be handled by a different worker. This initial implementation requires
both workers to have access to the same filesystem to share the package.
  • Loading branch information
jraddaoui committed Apr 20, 2024
1 parent f657aaf commit cddeb0c
Show file tree
Hide file tree
Showing 11 changed files with 412 additions and 26 deletions.
29 changes: 28 additions & 1 deletion Tiltfile
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,35 @@ KUBE_OVERLAY = 'hack/kube/overlays/dev-a3m'
if PRES_SYS == 'am':
KUBE_OVERLAY = 'hack/kube/overlays/dev-am'

# Load Kustomize YAML
yaml = kustomize(KUBE_OVERLAY)

# Preprocessing
PREPROCESSING_PATH = os.environ.get("PREPROCESSING_PATH", "")
if PREPROCESSING_PATH != "":
# Load preprocessing Tiltfile for Enduro
load_dynamic(PREPROCESSING_PATH + "/Tiltfile.enduro")
# Get Enduro a3m/am worker k8s manifest
if PRES_SYS == "a3m":
pres_yaml, yaml = filter_yaml(yaml, name="^enduro-a3m$", kind="StatefulSet")
else:
pres_yaml, yaml = filter_yaml(yaml, name="^enduro-am$", kind="Deployment")
# Append preprocessing volume and volume mount to worker container,
# this will only work in single node k8s cluster deployments
volume = {"name": "shared-dir", "persistentVolumeClaim": {"claimName": "preprocessing-pvc"}}
volume_mount = {"name": "shared-dir", "mountPath": "/home/enduro/preprocessing"}
pres_obj = decode_yaml(pres_yaml)
if "volumes" not in pres_obj["spec"]["template"]["spec"]:
pres_obj["spec"]["template"]["spec"]["volumes"] = []
pres_obj["spec"]["template"]["spec"]["volumes"].append(volume)
for container in pres_obj["spec"]["template"]["spec"]["containers"]:
if container["name"] in ["enduro-a3m-worker", "enduro-am-worker"]:
container["volumeMounts"].append(volume_mount)
pres_yaml = encode_yaml(pres_obj)
yaml = [yaml, pres_yaml]

# Load Kubernetes resources
k8s_yaml(kustomize(KUBE_OVERLAY))
k8s_yaml(yaml)

# Configure trigger mode
trigger_mode = TRIGGER_MODE_MANUAL
Expand Down
1 change: 1 addition & 0 deletions docs/src/dev-manual/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ This is the developer manual for Enduro SDPS.
- [Dependency management](deps.md)
- [Environment setup](devel.md)
- [Working with Archivematica](archivematica.md)
- [Preprocessing child workflow](preprocessing.md)
- [Logging](logging.md)
- [Makefile](make.md)
- [Testing](testing.md)
9 changes: 9 additions & 0 deletions docs/src/dev-manual/devel.md
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,14 @@ are planning to use Archivematica as preservation system.
Build and use a local version of a3m. Requires to have the `a3m` repository
cloned as a sibling of this repository folder.

### PREPROCESSING_PATH

Relative path to a preprocessing child workflow repository. It loads a Tiltfile
called `Tiltfile.enduro` from that repository and mounts a presistent volume
claim (PVC) in the preservation system pod. That PVC must be defined in the
preprocessing and be called `preprocessing-pvc`. Check the [Preprocessing child
workflow] docs to configure the child workflow execution.

## Tilt UI helpers

### Upload to Minio
Expand Down Expand Up @@ -259,3 +267,4 @@ is sometimes not setup properly. To solve it, from the Tilt UI, restart the
[visual studio code]: https://code.visualstudio.com/
[working with archivematica]: archivematica.md
[devbox]: https://www.jetpack.io/devbox/docs/quickstart/#install-devbox
[preprocessing child workflow]: preprocessing.md
34 changes: 34 additions & 0 deletions docs/src/dev-manual/preprocessing.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Preprocessing child workflow

The processing workflow can be extended with the execution of a preprocessing
child workflow.

## Configuration

### `.tilt.env`

Check the [Tilt environment configuration].

### `enduro.toml`

```toml
# Optional preprocessing child workflow configuration.
[preprocessing]
# enabled triggers the execution of the child workflow, when set to false all other
# options are ignored.
enabled = true
# extract determines if the package extraction happens on the child workflow.
extract = false
# sharedPath is the full path to the directory used to share the package between workflows,
# required when enabled is set to true.
sharedPath = "/home/enduro/preprocessing"

# Temporal configuration to trigger the preprocessing child workflow, all fields are
# required when enabled is set to true.
[preprocessing.temporal]
namespace = "default"
taskQueue = "preprocessing"
workflowName = "preprocessing"
```

[tilt environment configuration]: devel.md#preprocessing_path
18 changes: 18 additions & 0 deletions enduro.toml
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,21 @@ bucket = "sips"
enabled = false
address = ""
samplingRatio = 1.0

# Optional preprocessing child workflow configuration.
[preprocessing]
# enabled triggers the execution of the child workflow, when set to false all other
# options are ignored.
enabled = false
# extract determines if the package extraction happens on the child workflow.
extract = false
# sharedPath is the full path to the directory used to share the package between workflows,
# required when enabled is set to true.
sharedPath = "/home/enduro/preprocessing"

# Temporal configuration to trigger the preprocessing child workflow, all fields are
# required when enabled is set to true.
[preprocessing.temporal]
namespace = "default"
taskQueue = "preprocessing"
workflowName = "preprocessing"
30 changes: 19 additions & 11 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/artefactual-sdps/enduro/internal/api"
"github.com/artefactual-sdps/enduro/internal/db"
"github.com/artefactual-sdps/enduro/internal/event"
"github.com/artefactual-sdps/enduro/internal/preprocessing"
"github.com/artefactual-sdps/enduro/internal/pres"
"github.com/artefactual-sdps/enduro/internal/storage"
"github.com/artefactual-sdps/enduro/internal/telemetry"
Expand All @@ -30,17 +31,18 @@ type Configuration struct {
Debug bool
DebugListen string

A3m a3m.Config
AM am.Config
API api.Config
Database db.Config
Event event.Config
Preservation pres.Config
Storage storage.Config
Temporal temporal.Config
Upload upload.Config
Watcher watcher.Config
Telemetry telemetry.Config
A3m a3m.Config
AM am.Config
API api.Config
Database db.Config
Event event.Config
Preservation pres.Config
Storage storage.Config
Temporal temporal.Config
Upload upload.Config
Watcher watcher.Config
Telemetry telemetry.Config
Preprocessing preprocessing.Config
}

func (c Configuration) Validate() error {
Expand All @@ -57,6 +59,12 @@ func (c Configuration) Validate() error {
return err
}
}
if config, ok := interface{}(c.Preprocessing).(ConfigurationValidator); ok {
err := config.Validate()
if err != nil {
return err

Check warning on line 65 in internal/config/config.go

View check run for this annotation

Codecov / codecov/patch

internal/config/config.go#L65

Added line #L65 was not covered by tests
}
}
return nil
}

Expand Down
46 changes: 46 additions & 0 deletions internal/preprocessing/preprocessing.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package preprocessing

import "errors"

type Config struct {
// Enable preprocessing child workflow.
Enabled bool
// Extract package in preprocessing.
Extract bool
// Local path shared between workers.
SharedPath string
// Temporal configuration.
Temporal Temporal
}

type Temporal struct {
Namespace string
TaskQueue string
WorkflowName string
}

type WorkflowParams struct {
// Relative path to the shared path.
RelativePath string
}

type WorkflowResult struct {
// Relative path to the shared path.
RelativePath string
}

// Validate implements config.ConfigurationValidator.
func (c Config) Validate() error {
if !c.Enabled {
return nil
}
if c.SharedPath == "" {
return errors.New("sharedPath is required in the [preprocessing] configuration")
}
if c.Temporal.Namespace == "" || c.Temporal.TaskQueue == "" || c.Temporal.WorkflowName == "" {
return errors.New(
"namespace, taskQueue and workflowName are required in the [preprocessing.temporal] configuration",
)
}
return nil
}
65 changes: 65 additions & 0 deletions internal/preprocessing/preprocessing_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package preprocessing_test

import (
"testing"

"gotest.tools/v3/assert"

"github.com/artefactual-sdps/enduro/internal/preprocessing"
)

func TestPreprocessingConfig(t *testing.T) {
t.Parallel()

type test struct {
name string
config preprocessing.Config
wantErr string
}
for _, tt := range []test{
{
name: "Validates if not enabled",
config: preprocessing.Config{
Enabled: false,
},
},
{
name: "Validates with all required fields",
config: preprocessing.Config{
Enabled: true,
SharedPath: "/tmp",
Temporal: preprocessing.Temporal{
Namespace: "default",
TaskQueue: "preprocessing",
WorkflowName: "preprocessing",
},
},
},
{
name: "Returns error if shared path is missing",
config: preprocessing.Config{
Enabled: true,
},
wantErr: "sharedPath is required in the [preprocessing] configuration",
},
{
name: "Returns error if temporal config is missing",
config: preprocessing.Config{
Enabled: true,
SharedPath: "/tmp",
},
wantErr: "namespace, taskQueue and workflowName are required in the [preprocessing.temporal] configuration",
},
} {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()

err := tt.config.Validate()
if tt.wantErr != "" {
assert.Error(t, err, tt.wantErr)
return
}
assert.NilError(t, err)
})
}
}
7 changes: 4 additions & 3 deletions internal/workflow/activities/download.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@ type DownloadActivity struct {
}

type DownloadActivityParams struct {
Key string
WatcherName string
Key string
WatcherName string
DestinationPath string
}

type DownloadActivityResult struct {
Expand All @@ -47,7 +48,7 @@ func (a *DownloadActivity) Execute(
"WatcherName", params.WatcherName,
)

destDir, err := os.MkdirTemp("", "enduro")
destDir, err := os.MkdirTemp(params.DestinationPath, "enduro")
if err != nil {
return &DownloadActivityResult{}, temporal_tools.NewNonRetryableError(fmt.Errorf("make temp dir: %v", err))
}
Expand Down
Loading

0 comments on commit cddeb0c

Please sign in to comment.