diff --git a/cmd/worker/workercmd/cmd.go b/cmd/worker/workercmd/cmd.go index 602aaf5a..44b78d83 100644 --- a/cmd/worker/workercmd/cmd.go +++ b/cmd/worker/workercmd/cmd.go @@ -107,6 +107,10 @@ func (m *Main) Run(ctx context.Context) error { xmlvalidate.New(xmlvalidate.NewXMLLintValidator()).Execute, temporalsdk_activity.RegisterOptions{Name: xmlvalidate.Name}, ) + w.RegisterActivityWithOptions( + activities.NewValidatePREMIS(xmlvalidate.NewXMLLintValidator()).Execute, + temporalsdk_activity.RegisterOptions{Name: activities.ValidatePREMISName}, + ) w.RegisterActivityWithOptions( activities.NewTransformSIP().Execute, temporalsdk_activity.RegisterOptions{Name: activities.TransformSIPName}, diff --git a/go.mod b/go.mod index b890468a..ed57bf06 100644 --- a/go.mod +++ b/go.mod @@ -15,6 +15,7 @@ require ( github.com/spf13/pflag v1.0.5 github.com/spf13/viper v1.18.2 github.com/stretchr/testify v1.9.0 + github.com/tonglil/buflogr v1.1.1 go.artefactual.dev/tools v0.17.0 go.temporal.io/sdk v1.26.1 go.uber.org/mock v0.4.0 diff --git a/internal/activities/validate_premis.go b/internal/activities/validate_premis.go new file mode 100644 index 00000000..38a3634e --- /dev/null +++ b/internal/activities/validate_premis.go @@ -0,0 +1,92 @@ +package activities + +import ( + "context" + "fmt" + "os" + "path/filepath" + + "github.com/artefactual-sdps/temporal-activities/xmlvalidate" + "go.artefactual.dev/tools/temporal" + + "github.com/artefactual-sdps/preprocessing-sfa/internal/fsutil" + "github.com/artefactual-sdps/preprocessing-sfa/internal/premis" +) + +const ValidatePREMISName = "ValidatePREMIS" + +type ( + ValidatePREMIS struct { + validator xmlvalidate.XSDValidator + xsd string + } + + ValidatePREMISParams struct { + // Path of the PREMIS XML file to be validated. + Path string + } + + ValidatePREMISResult struct { + Failures []string + } +) + +func NewValidatePREMIS(v xmlvalidate.XSDValidator) *ValidatePREMIS { + return &ValidatePREMIS{validator: v} +} + +// Execute validates the given PREMIS file against an XSD. +func (a *ValidatePREMIS) Execute(ctx context.Context, params *ValidatePREMISParams) (*ValidatePREMISResult, error) { + var failures []string + + logger := temporal.GetLogger(ctx) + + if !fsutil.FileExists(params.Path) { + failures = append( + failures, + fmt.Sprintf("file not found: %s", filepath.Base(params.Path)), + ) + return &ValidatePREMISResult{Failures: failures}, nil + } + + xsd, err := a.xsdPath() + if err != nil { + return nil, fmt.Errorf("get PREMIS XSD path: %v", err) + } + + out, err := a.validator.Validate(ctx, params.Path, xsd) + if err != nil { + return nil, fmt.Errorf("validate PREMIS: %v", err) + } + if out != "" { + logger.Info("PREMIS validation failed", "file", params.Path, "output", out) + failures = append( + failures, + fmt.Sprintf("%s does not match expected metadata requirements", filepath.Base(params.Path)), + ) + } + + return &ValidatePREMISResult{Failures: failures}, nil +} + +// xsdPath returns the path to a local PREMIS v3 XSD file, creating the file if +// necessary. +func (a *ValidatePREMIS) xsdPath() (string, error) { + if a.xsd != "" { + return a.xsd, nil + } + + f, err := os.CreateTemp("", "premis-v3-*.xsd") + if err != nil { + return "", err + } + defer f.Close() + + if _, err := f.Write(premis.XSDv3); err != nil { + return "", err + } + + a.xsd = f.Name() + + return f.Name(), nil +} diff --git a/internal/activities/validate_premis_test.go b/internal/activities/validate_premis_test.go new file mode 100644 index 00000000..1088b4bc --- /dev/null +++ b/internal/activities/validate_premis_test.go @@ -0,0 +1,154 @@ +package activities_test + +import ( + "bytes" + "context" + "errors" + "testing" + + "github.com/artefactual-sdps/temporal-activities/xmlvalidate" + "github.com/tonglil/buflogr" + "go.artefactual.dev/tools/temporal" + temporalsdk_activity "go.temporal.io/sdk/activity" + temporalsdk_interceptor "go.temporal.io/sdk/interceptor" + temporalsdk_testsuite "go.temporal.io/sdk/testsuite" + temporalsdk_worker "go.temporal.io/sdk/worker" + "gotest.tools/v3/assert" + "gotest.tools/v3/fs" + + "github.com/artefactual-sdps/preprocessing-sfa/internal/activities" +) + +var premisXML = ` + + + + uuid + c74a85b7-919b-409e-8209-9c7ebe0e7945 + + + + + + + + + data/objects/test_transfer/content/cat.jpg + + +` + +type fakeValidator struct { + Msg string + Err error +} + +func (v *fakeValidator) Validate(ctx context.Context, xmlPath, xsdPath string) (string, error) { + return v.Msg, v.Err +} + +func (v *fakeValidator) WithMsg(msg string) *fakeValidator { + v.Msg = msg + return v +} + +func (v *fakeValidator) WithErr(err error) *fakeValidator { + v.Err = err + return v +} + +func newFakeValidator() *fakeValidator { + return &fakeValidator{} +} + +func TestValidatePREMIS(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + validator xmlvalidate.XSDValidator + params activities.ValidatePREMISParams + want activities.ValidatePREMISResult + wantErr string + }{ + { + name: "Validates a PREMIS file", + validator: xmlvalidate.NewXMLLintValidator(), + params: activities.ValidatePREMISParams{ + Path: fs.NewDir(t, "enduro-test", + fs.WithFile("premis.xml", premisXML), + ).Join("premis.xml"), + }, + }, + { + name: "Returns a validation failure", + validator: newFakeValidator().WithMsg("premis.xml:12: parser error"), + params: activities.ValidatePREMISParams{ + Path: fs.NewDir(t, "enduro-test", + fs.WithFile("premis.xml", premisXML), + ).Join("premis.xml"), + }, + want: activities.ValidatePREMISResult{ + Failures: []string{"premis.xml does not match expected metadata requirements"}, + }, + }, + { + name: "Returns a file not found failure", + validator: newFakeValidator().WithErr(errors.New("file not found")), + params: activities.ValidatePREMISParams{ + Path: fs.NewDir(t, "enduro-test").Join("premis.xml"), + }, + want: activities.ValidatePREMISResult{ + Failures: []string{"file not found: premis.xml"}, + }, + }, + { + name: "Returns a system error", + validator: newFakeValidator().WithErr(errors.New("permission denied: premis.xml")), + params: activities.ValidatePREMISParams{ + Path: fs.NewDir(t, "enduro-test", + fs.WithFile("premis.xml", premisXML), + ).Join("premis.xml"), + }, + wantErr: "permission denied: premis.xml", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + var logbuf bytes.Buffer + logger := buflogr.NewWithBuffer(&logbuf) + + ts := &temporalsdk_testsuite.WorkflowTestSuite{} + env := ts.NewTestActivityEnvironment() + env.SetWorkerOptions(temporalsdk_worker.Options{ + Interceptors: []temporalsdk_interceptor.WorkerInterceptor{ + temporal.NewLoggerInterceptor(logger), + }, + }) + env.RegisterActivityWithOptions( + activities.NewValidatePREMIS(tt.validator).Execute, + temporalsdk_activity.RegisterOptions{Name: activities.ValidatePREMISName}, + ) + + enc, err := env.ExecuteActivity(activities.ValidatePREMISName, tt.params) + if tt.wantErr != "" { + if err == nil { + t.Errorf("error is nil, expecting: %q", tt.wantErr) + } else { + assert.ErrorContains(t, err, tt.wantErr) + } + + return + } + + t.Log(logbuf.String()) // Echo log for debugging. + assert.NilError(t, err) + + var result activities.ValidatePREMISResult + _ = enc.Get(&result) + assert.DeepEqual(t, result, tt.want) + }) + } +} diff --git a/internal/premis/premis-v3-0.xsd b/internal/premis/premis-v3-0.xsd new file mode 100644 index 00000000..f8564a73 --- /dev/null +++ b/internal/premis/premis-v3-0.xsd @@ -0,0 +1,1224 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/internal/premis/xsdv3.go b/internal/premis/xsdv3.go new file mode 100644 index 00000000..abdf59e6 --- /dev/null +++ b/internal/premis/xsdv3.go @@ -0,0 +1,6 @@ +package premis + +import _ "embed" + +//go:embed premis-v3-0.xsd +var XSDv3 []byte diff --git a/internal/workflow/preprocessing.go b/internal/workflow/preprocessing.go index 8828e444..479d7544 100644 --- a/internal/workflow/preprocessing.go +++ b/internal/workflow/preprocessing.go @@ -294,6 +294,31 @@ func (w *PreprocessingWorkflow) Execute( ev.Succeed(ctx, "Metadata validation successful") } + // Validate logical metadata if SIP is an AIP type. + if identifySIP.SIP.IsAIP() { + ev = result.newEvent(ctx, "Validate logical metadata") + var validateLMD activities.ValidatePREMISResult + e = temporalsdk_workflow.ExecuteActivity( + withFilesysActOpts(ctx), + activities.ValidatePREMISName, + activities.ValidatePREMISParams{Path: identifySIP.SIP.LogicalMDPath}, + ).Get(ctx, &validateLMD) + if e != nil { + result.systemError(ctx, e, ev, "logical metadata validation has failed") + return result, nil + } + if validateLMD.Failures != nil { + result.validationError( + ctx, + ev, + "logical metadata validation has failed", + validateLMD.Failures, + ) + } else { + ev.Succeed(ctx, "Logical metadata validation successful") + } + } + // Stop here if the SIP content isn't valid. if result.Outcome == OutcomeContentError { return result, nil diff --git a/internal/workflow/preprocessing_test.go b/internal/workflow/preprocessing_test.go index f3645302..890e5920 100644 --- a/internal/workflow/preprocessing_test.go +++ b/internal/workflow/preprocessing_test.go @@ -175,6 +175,10 @@ func (s *PreprocessingTestSuite) SetupTest(cfg *config.Configuration) { xmlvalidate.New(nil).Execute, temporalsdk_activity.RegisterOptions{Name: xmlvalidate.Name}, ) + s.env.RegisterActivityWithOptions( + activities.NewValidatePREMIS(nil).Execute, + temporalsdk_activity.RegisterOptions{Name: activities.ValidatePREMISName}, + ) s.env.RegisterActivityWithOptions( activities.NewTransformSIP().Execute, temporalsdk_activity.RegisterOptions{Name: activities.TransformSIPName}, @@ -319,6 +323,14 @@ func (s *PreprocessingTestSuite) TestPreprocessingWorkflowSuccess() { &xmlvalidate.Result{}, nil, ) + s.env.OnActivity( + activities.ValidatePREMISName, + sessionCtx, + &activities.ValidatePREMISParams{Path: expectedSIP.LogicalMDPath}, + ).Return( + &activities.ValidatePREMISResult{}, nil, + ) + // PREMIS activities. s.env.OnActivity( activities.AddPREMISObjectsName, @@ -481,6 +493,13 @@ func (s *PreprocessingTestSuite) TestPreprocessingWorkflowSuccess() { StartedAt: testTime, CompletedAt: testTime, }, + { + Name: "Validate logical metadata", + Message: "Logical metadata validation successful", + Outcome: enums.EventOutcomeSuccess, + StartedAt: testTime, + CompletedAt: testTime, + }, { Name: "Create premis.xml", Message: "Created a premis.xml and stored in metadata directory", @@ -636,6 +655,16 @@ func (s *PreprocessingTestSuite) TestPreprocessingWorkflowValidationFails() { }, nil, ) + s.env.OnActivity( + activities.ValidatePREMISName, + sessionCtx, + &activities.ValidatePREMISParams{Path: expectedSIP.LogicalMDPath}, + ).Return( + &activities.ValidatePREMISResult{ + Failures: []string{`test-AIP-premis.xml does not match expected metadata requirements`}, + }, nil, + ) + s.env.ExecuteWorkflow( s.workflow.Execute, &workflow.PreprocessingWorkflowParams{RelativePath: relPath}, @@ -708,6 +737,14 @@ invalid PDF/A: "contents/contents/d_0000001/test.pdf"`, StartedAt: testTime, CompletedAt: testTime, }, + { + Name: "Validate logical metadata", + Message: `Content error: logical metadata validation has failed: +test-AIP-premis.xml does not match expected metadata requirements`, + Outcome: enums.EventOutcomeValidationFailure, + StartedAt: testTime, + CompletedAt: testTime, + }, }, }, &result,