diff --git a/cmd/worker/workercmd/cmd.go b/cmd/worker/workercmd/cmd.go
index 602aaf5a..44b78d83 100644
--- a/cmd/worker/workercmd/cmd.go
+++ b/cmd/worker/workercmd/cmd.go
@@ -107,6 +107,10 @@ func (m *Main) Run(ctx context.Context) error {
xmlvalidate.New(xmlvalidate.NewXMLLintValidator()).Execute,
temporalsdk_activity.RegisterOptions{Name: xmlvalidate.Name},
)
+ w.RegisterActivityWithOptions(
+ activities.NewValidatePREMIS(xmlvalidate.NewXMLLintValidator()).Execute,
+ temporalsdk_activity.RegisterOptions{Name: activities.ValidatePREMISName},
+ )
w.RegisterActivityWithOptions(
activities.NewTransformSIP().Execute,
temporalsdk_activity.RegisterOptions{Name: activities.TransformSIPName},
diff --git a/go.mod b/go.mod
index b890468a..ed57bf06 100644
--- a/go.mod
+++ b/go.mod
@@ -15,6 +15,7 @@ require (
github.com/spf13/pflag v1.0.5
github.com/spf13/viper v1.18.2
github.com/stretchr/testify v1.9.0
+ github.com/tonglil/buflogr v1.1.1
go.artefactual.dev/tools v0.17.0
go.temporal.io/sdk v1.26.1
go.uber.org/mock v0.4.0
diff --git a/internal/activities/validate_premis.go b/internal/activities/validate_premis.go
new file mode 100644
index 00000000..38a3634e
--- /dev/null
+++ b/internal/activities/validate_premis.go
@@ -0,0 +1,92 @@
+package activities
+
+import (
+ "context"
+ "fmt"
+ "os"
+ "path/filepath"
+
+ "github.com/artefactual-sdps/temporal-activities/xmlvalidate"
+ "go.artefactual.dev/tools/temporal"
+
+ "github.com/artefactual-sdps/preprocessing-sfa/internal/fsutil"
+ "github.com/artefactual-sdps/preprocessing-sfa/internal/premis"
+)
+
+const ValidatePREMISName = "ValidatePREMIS"
+
+type (
+ ValidatePREMIS struct {
+ validator xmlvalidate.XSDValidator
+ xsd string
+ }
+
+ ValidatePREMISParams struct {
+ // Path of the PREMIS XML file to be validated.
+ Path string
+ }
+
+ ValidatePREMISResult struct {
+ Failures []string
+ }
+)
+
+func NewValidatePREMIS(v xmlvalidate.XSDValidator) *ValidatePREMIS {
+ return &ValidatePREMIS{validator: v}
+}
+
+// Execute validates the given PREMIS file against an XSD.
+func (a *ValidatePREMIS) Execute(ctx context.Context, params *ValidatePREMISParams) (*ValidatePREMISResult, error) {
+ var failures []string
+
+ logger := temporal.GetLogger(ctx)
+
+ if !fsutil.FileExists(params.Path) {
+ failures = append(
+ failures,
+ fmt.Sprintf("file not found: %s", filepath.Base(params.Path)),
+ )
+ return &ValidatePREMISResult{Failures: failures}, nil
+ }
+
+ xsd, err := a.xsdPath()
+ if err != nil {
+ return nil, fmt.Errorf("get PREMIS XSD path: %v", err)
+ }
+
+ out, err := a.validator.Validate(ctx, params.Path, xsd)
+ if err != nil {
+ return nil, fmt.Errorf("validate PREMIS: %v", err)
+ }
+ if out != "" {
+ logger.Info("PREMIS validation failed", "file", params.Path, "output", out)
+ failures = append(
+ failures,
+ fmt.Sprintf("%s does not match expected metadata requirements", filepath.Base(params.Path)),
+ )
+ }
+
+ return &ValidatePREMISResult{Failures: failures}, nil
+}
+
+// xsdPath returns the path to a local PREMIS v3 XSD file, creating the file if
+// necessary.
+func (a *ValidatePREMIS) xsdPath() (string, error) {
+ if a.xsd != "" {
+ return a.xsd, nil
+ }
+
+ f, err := os.CreateTemp("", "premis-v3-*.xsd")
+ if err != nil {
+ return "", err
+ }
+ defer f.Close()
+
+ if _, err := f.Write(premis.XSDv3); err != nil {
+ return "", err
+ }
+
+ a.xsd = f.Name()
+
+ return f.Name(), nil
+}
diff --git a/internal/activities/validate_premis_test.go b/internal/activities/validate_premis_test.go
new file mode 100644
index 00000000..1088b4bc
--- /dev/null
+++ b/internal/activities/validate_premis_test.go
@@ -0,0 +1,154 @@
+package activities_test
+
+import (
+ "bytes"
+ "context"
+ "errors"
+ "testing"
+
+ "github.com/artefactual-sdps/temporal-activities/xmlvalidate"
+ "github.com/tonglil/buflogr"
+ "go.artefactual.dev/tools/temporal"
+ temporalsdk_activity "go.temporal.io/sdk/activity"
+ temporalsdk_interceptor "go.temporal.io/sdk/interceptor"
+ temporalsdk_testsuite "go.temporal.io/sdk/testsuite"
+ temporalsdk_worker "go.temporal.io/sdk/worker"
+ "gotest.tools/v3/assert"
+ "gotest.tools/v3/fs"
+
+ "github.com/artefactual-sdps/preprocessing-sfa/internal/activities"
+)
+
+var premisXML = `
+
+
+
+ uuid
+ c74a85b7-919b-409e-8209-9c7ebe0e7945
+
+
+
+
+
+
+
+
+ data/objects/test_transfer/content/cat.jpg
+
+
+`
+
+type fakeValidator struct {
+ Msg string
+ Err error
+}
+
+func (v *fakeValidator) Validate(ctx context.Context, xmlPath, xsdPath string) (string, error) {
+ return v.Msg, v.Err
+}
+
+func (v *fakeValidator) WithMsg(msg string) *fakeValidator {
+ v.Msg = msg
+ return v
+}
+
+func (v *fakeValidator) WithErr(err error) *fakeValidator {
+ v.Err = err
+ return v
+}
+
+func newFakeValidator() *fakeValidator {
+ return &fakeValidator{}
+}
+
+func TestValidatePREMIS(t *testing.T) {
+ t.Parallel()
+
+ tests := []struct {
+ name string
+ validator xmlvalidate.XSDValidator
+ params activities.ValidatePREMISParams
+ want activities.ValidatePREMISResult
+ wantErr string
+ }{
+ {
+ name: "Validates a PREMIS file",
+ validator: xmlvalidate.NewXMLLintValidator(),
+ params: activities.ValidatePREMISParams{
+ Path: fs.NewDir(t, "enduro-test",
+ fs.WithFile("premis.xml", premisXML),
+ ).Join("premis.xml"),
+ },
+ },
+ {
+ name: "Returns a validation failure",
+ validator: newFakeValidator().WithMsg("premis.xml:12: parser error"),
+ params: activities.ValidatePREMISParams{
+ Path: fs.NewDir(t, "enduro-test",
+ fs.WithFile("premis.xml", premisXML),
+ ).Join("premis.xml"),
+ },
+ want: activities.ValidatePREMISResult{
+ Failures: []string{"premis.xml does not match expected metadata requirements"},
+ },
+ },
+ {
+ name: "Returns a file not found failure",
+ validator: newFakeValidator().WithErr(errors.New("file not found")),
+ params: activities.ValidatePREMISParams{
+ Path: fs.NewDir(t, "enduro-test").Join("premis.xml"),
+ },
+ want: activities.ValidatePREMISResult{
+ Failures: []string{"file not found: premis.xml"},
+ },
+ },
+ {
+ name: "Returns a system error",
+ validator: newFakeValidator().WithErr(errors.New("permission denied: premis.xml")),
+ params: activities.ValidatePREMISParams{
+ Path: fs.NewDir(t, "enduro-test",
+ fs.WithFile("premis.xml", premisXML),
+ ).Join("premis.xml"),
+ },
+ wantErr: "permission denied: premis.xml",
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ t.Parallel()
+
+ var logbuf bytes.Buffer
+ logger := buflogr.NewWithBuffer(&logbuf)
+
+ ts := &temporalsdk_testsuite.WorkflowTestSuite{}
+ env := ts.NewTestActivityEnvironment()
+ env.SetWorkerOptions(temporalsdk_worker.Options{
+ Interceptors: []temporalsdk_interceptor.WorkerInterceptor{
+ temporal.NewLoggerInterceptor(logger),
+ },
+ })
+ env.RegisterActivityWithOptions(
+ activities.NewValidatePREMIS(tt.validator).Execute,
+ temporalsdk_activity.RegisterOptions{Name: activities.ValidatePREMISName},
+ )
+
+ enc, err := env.ExecuteActivity(activities.ValidatePREMISName, tt.params)
+ if tt.wantErr != "" {
+ if err == nil {
+ t.Errorf("error is nil, expecting: %q", tt.wantErr)
+ } else {
+ assert.ErrorContains(t, err, tt.wantErr)
+ }
+
+ return
+ }
+
+ t.Log(logbuf.String()) // Echo log for debugging.
+ assert.NilError(t, err)
+
+ var result activities.ValidatePREMISResult
+ _ = enc.Get(&result)
+ assert.DeepEqual(t, result, tt.want)
+ })
+ }
+}
diff --git a/internal/premis/premis-v3-0.xsd b/internal/premis/premis-v3-0.xsd
new file mode 100644
index 00000000..f8564a73
--- /dev/null
+++ b/internal/premis/premis-v3-0.xsd
@@ -0,0 +1,1224 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/internal/premis/xsdv3.go b/internal/premis/xsdv3.go
new file mode 100644
index 00000000..abdf59e6
--- /dev/null
+++ b/internal/premis/xsdv3.go
@@ -0,0 +1,6 @@
+package premis
+
+import _ "embed"
+
+//go:embed premis-v3-0.xsd
+var XSDv3 []byte
diff --git a/internal/workflow/preprocessing.go b/internal/workflow/preprocessing.go
index 8828e444..479d7544 100644
--- a/internal/workflow/preprocessing.go
+++ b/internal/workflow/preprocessing.go
@@ -294,6 +294,31 @@ func (w *PreprocessingWorkflow) Execute(
ev.Succeed(ctx, "Metadata validation successful")
}
+ // Validate logical metadata if SIP is an AIP type.
+ if identifySIP.SIP.IsAIP() {
+ ev = result.newEvent(ctx, "Validate logical metadata")
+ var validateLMD activities.ValidatePREMISResult
+ e = temporalsdk_workflow.ExecuteActivity(
+ withFilesysActOpts(ctx),
+ activities.ValidatePREMISName,
+ activities.ValidatePREMISParams{Path: identifySIP.SIP.LogicalMDPath},
+ ).Get(ctx, &validateLMD)
+ if e != nil {
+ result.systemError(ctx, e, ev, "logical metadata validation has failed")
+ return result, nil
+ }
+ if validateLMD.Failures != nil {
+ result.validationError(
+ ctx,
+ ev,
+ "logical metadata validation has failed",
+ validateLMD.Failures,
+ )
+ } else {
+ ev.Succeed(ctx, "Logical metadata validation successful")
+ }
+ }
+
// Stop here if the SIP content isn't valid.
if result.Outcome == OutcomeContentError {
return result, nil
diff --git a/internal/workflow/preprocessing_test.go b/internal/workflow/preprocessing_test.go
index f3645302..890e5920 100644
--- a/internal/workflow/preprocessing_test.go
+++ b/internal/workflow/preprocessing_test.go
@@ -175,6 +175,10 @@ func (s *PreprocessingTestSuite) SetupTest(cfg *config.Configuration) {
xmlvalidate.New(nil).Execute,
temporalsdk_activity.RegisterOptions{Name: xmlvalidate.Name},
)
+ s.env.RegisterActivityWithOptions(
+ activities.NewValidatePREMIS(nil).Execute,
+ temporalsdk_activity.RegisterOptions{Name: activities.ValidatePREMISName},
+ )
s.env.RegisterActivityWithOptions(
activities.NewTransformSIP().Execute,
temporalsdk_activity.RegisterOptions{Name: activities.TransformSIPName},
@@ -319,6 +323,14 @@ func (s *PreprocessingTestSuite) TestPreprocessingWorkflowSuccess() {
&xmlvalidate.Result{}, nil,
)
+ s.env.OnActivity(
+ activities.ValidatePREMISName,
+ sessionCtx,
+ &activities.ValidatePREMISParams{Path: expectedSIP.LogicalMDPath},
+ ).Return(
+ &activities.ValidatePREMISResult{}, nil,
+ )
+
// PREMIS activities.
s.env.OnActivity(
activities.AddPREMISObjectsName,
@@ -481,6 +493,13 @@ func (s *PreprocessingTestSuite) TestPreprocessingWorkflowSuccess() {
StartedAt: testTime,
CompletedAt: testTime,
},
+ {
+ Name: "Validate logical metadata",
+ Message: "Logical metadata validation successful",
+ Outcome: enums.EventOutcomeSuccess,
+ StartedAt: testTime,
+ CompletedAt: testTime,
+ },
{
Name: "Create premis.xml",
Message: "Created a premis.xml and stored in metadata directory",
@@ -636,6 +655,16 @@ func (s *PreprocessingTestSuite) TestPreprocessingWorkflowValidationFails() {
}, nil,
)
+ s.env.OnActivity(
+ activities.ValidatePREMISName,
+ sessionCtx,
+ &activities.ValidatePREMISParams{Path: expectedSIP.LogicalMDPath},
+ ).Return(
+ &activities.ValidatePREMISResult{
+ Failures: []string{`test-AIP-premis.xml does not match expected metadata requirements`},
+ }, nil,
+ )
+
s.env.ExecuteWorkflow(
s.workflow.Execute,
&workflow.PreprocessingWorkflowParams{RelativePath: relPath},
@@ -708,6 +737,14 @@ invalid PDF/A: "contents/contents/d_0000001/test.pdf"`,
StartedAt: testTime,
CompletedAt: testTime,
},
+ {
+ Name: "Validate logical metadata",
+ Message: `Content error: logical metadata validation has failed:
+test-AIP-premis.xml does not match expected metadata requirements`,
+ Outcome: enums.EventOutcomeValidationFailure,
+ StartedAt: testTime,
+ CompletedAt: testTime,
+ },
},
},
&result,