Skip to content

Commit

Permalink
Validate the logical metadata file
Browse files Browse the repository at this point in the history
Refs #98.

Add an activity to validate the logical metadata PREMIS XML file against
a local copy of the PREMIS v3 XSD. To avoid downloading the PREMIS XSD
every time the activity runs I've embedded a local copy of the XSD in
the worker Go binary, and write the contents to a file on disk so it can
be loaded by xmllint.
  • Loading branch information
djjuhasz committed Dec 17, 2024
1 parent b9974bc commit 2d7f5ea
Show file tree
Hide file tree
Showing 8 changed files with 1,543 additions and 0 deletions.
4 changes: 4 additions & 0 deletions cmd/worker/workercmd/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,10 @@ func (m *Main) Run(ctx context.Context) error {
xmlvalidate.New(xmlvalidate.NewXMLLintValidator()).Execute,
temporalsdk_activity.RegisterOptions{Name: xmlvalidate.Name},
)
w.RegisterActivityWithOptions(
activities.NewValidatePREMIS(xmlvalidate.NewXMLLintValidator()).Execute,
temporalsdk_activity.RegisterOptions{Name: activities.ValidatePREMISName},
)
w.RegisterActivityWithOptions(
activities.NewTransformSIP().Execute,
temporalsdk_activity.RegisterOptions{Name: activities.TransformSIPName},
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ require (
github.com/spf13/pflag v1.0.5
github.com/spf13/viper v1.18.2
github.com/stretchr/testify v1.9.0
github.com/tonglil/buflogr v1.1.1
go.artefactual.dev/tools v0.17.0
go.temporal.io/sdk v1.26.1
go.uber.org/mock v0.4.0
Expand Down
92 changes: 92 additions & 0 deletions internal/activities/validate_premis.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
package activities

import (
"context"
"fmt"
"os"
"path/filepath"

"github.com/artefactual-sdps/temporal-activities/xmlvalidate"
"go.artefactual.dev/tools/temporal"

"github.com/artefactual-sdps/preprocessing-sfa/internal/fsutil"
"github.com/artefactual-sdps/preprocessing-sfa/internal/premis"
)

const ValidatePREMISName = "ValidatePREMIS"

type (
ValidatePREMIS struct {
validator xmlvalidate.XSDValidator
xsd string
}

ValidatePREMISParams struct {
// Path of the PREMIS XML file to be validated.
Path string
}

ValidatePREMISResult struct {
Failures []string
}
)

func NewValidatePREMIS(v xmlvalidate.XSDValidator) *ValidatePREMIS {
return &ValidatePREMIS{validator: v}
}

// Execute validates the given PREMIS file against an XSD.
func (a *ValidatePREMIS) Execute(ctx context.Context, params *ValidatePREMISParams) (*ValidatePREMISResult, error) {
var failures []string

logger := temporal.GetLogger(ctx)

if !fsutil.FileExists(params.Path) {
failures = append(
failures,
fmt.Sprintf("file not found: %s", filepath.Base(params.Path)),
)
return &ValidatePREMISResult{Failures: failures}, nil
}

xsd, err := a.xsdPath()
if err != nil {
return nil, fmt.Errorf("get PREMIS XSD path: %v", err)
}

out, err := a.validator.Validate(ctx, params.Path, xsd)
if err != nil {
return nil, fmt.Errorf("validate PREMIS: %v", err)
}
if out != "" {
logger.Info("PREMIS validation failed", "file", params.Path, "output", out)
failures = append(
failures,
fmt.Sprintf("%s does not match expected metadata requirements", filepath.Base(params.Path)),
)
}

return &ValidatePREMISResult{Failures: failures}, nil
}

// xsdPath returns the path to a local PREMIS v3 XSD file, creating the file if
// necessary.
func (a *ValidatePREMIS) xsdPath() (string, error) {
if a.xsd != "" {
return a.xsd, nil
}

f, err := os.CreateTemp("", "premis-v3-*.xsd")
if err != nil {
return "", err
}
defer f.Close()

if _, err := f.Write(premis.XSDv3); err != nil {
return "", err
}

a.xsd = f.Name()

return f.Name(), nil
}
154 changes: 154 additions & 0 deletions internal/activities/validate_premis_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
package activities_test

import (
"bytes"
"context"
"errors"
"testing"

"github.com/artefactual-sdps/temporal-activities/xmlvalidate"
"github.com/tonglil/buflogr"
"go.artefactual.dev/tools/temporal"
temporalsdk_activity "go.temporal.io/sdk/activity"
temporalsdk_interceptor "go.temporal.io/sdk/interceptor"
temporalsdk_testsuite "go.temporal.io/sdk/testsuite"
temporalsdk_worker "go.temporal.io/sdk/worker"
"gotest.tools/v3/assert"
"gotest.tools/v3/fs"

"github.com/artefactual-sdps/preprocessing-sfa/internal/activities"
)

var premisXML = `<?xml version="1.0" encoding="UTF-8"?>
<premis:premis xmlns:premis="http://www.loc.gov/premis/v3" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/premis/v3 https://www.loc.gov/standards/premis/premis.xsd" version="3.0">
<premis:object xsi:type="premis:file">
<premis:objectIdentifier>
<premis:objectIdentifierType>uuid</premis:objectIdentifierType>
<premis:objectIdentifierValue>c74a85b7-919b-409e-8209-9c7ebe0e7945</premis:objectIdentifierValue>
</premis:objectIdentifier>
<premis:objectCharacteristics>
<premis:format>
<premis:formatDesignation>
<premis:formatName/>
</premis:formatDesignation>
</premis:format>
</premis:objectCharacteristics>
<premis:originalName>data/objects/test_transfer/content/cat.jpg</premis:originalName>
</premis:object>
</premis:premis>
`

type fakeValidator struct {
Msg string
Err error
}

func (v *fakeValidator) Validate(ctx context.Context, xmlPath, xsdPath string) (string, error) {
return v.Msg, v.Err
}

func (v *fakeValidator) WithMsg(msg string) *fakeValidator {
v.Msg = msg
return v
}

func (v *fakeValidator) WithErr(err error) *fakeValidator {
v.Err = err
return v
}

func newFakeValidator() *fakeValidator {
return &fakeValidator{}
}

func TestValidatePREMIS(t *testing.T) {
t.Parallel()

tests := []struct {
name string
validator xmlvalidate.XSDValidator
params activities.ValidatePREMISParams
want activities.ValidatePREMISResult
wantErr string
}{
{
name: "Validates a PREMIS file",
validator: xmlvalidate.NewXMLLintValidator(),
params: activities.ValidatePREMISParams{
Path: fs.NewDir(t, "enduro-test",
fs.WithFile("premis.xml", premisXML),
).Join("premis.xml"),
},
},
{
name: "Returns a validation failure",
validator: newFakeValidator().WithMsg("premis.xml:12: parser error"),
params: activities.ValidatePREMISParams{
Path: fs.NewDir(t, "enduro-test",
fs.WithFile("premis.xml", premisXML),
).Join("premis.xml"),
},
want: activities.ValidatePREMISResult{
Failures: []string{"premis.xml does not match expected metadata requirements"},
},
},
{
name: "Returns a file not found failure",
validator: newFakeValidator().WithErr(errors.New("file not found")),
params: activities.ValidatePREMISParams{
Path: fs.NewDir(t, "enduro-test").Join("premis.xml"),
},
want: activities.ValidatePREMISResult{
Failures: []string{"file not found: premis.xml"},
},
},
{
name: "Returns a system error",
validator: newFakeValidator().WithErr(errors.New("permission denied: premis.xml")),
params: activities.ValidatePREMISParams{
Path: fs.NewDir(t, "enduro-test",
fs.WithFile("premis.xml", premisXML),
).Join("premis.xml"),
},
wantErr: "permission denied: premis.xml",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()

var logbuf bytes.Buffer
logger := buflogr.NewWithBuffer(&logbuf)

ts := &temporalsdk_testsuite.WorkflowTestSuite{}
env := ts.NewTestActivityEnvironment()
env.SetWorkerOptions(temporalsdk_worker.Options{
Interceptors: []temporalsdk_interceptor.WorkerInterceptor{
temporal.NewLoggerInterceptor(logger),
},
})
env.RegisterActivityWithOptions(
activities.NewValidatePREMIS(tt.validator).Execute,
temporalsdk_activity.RegisterOptions{Name: activities.ValidatePREMISName},
)

enc, err := env.ExecuteActivity(activities.ValidatePREMISName, tt.params)
if tt.wantErr != "" {
if err == nil {
t.Errorf("error is nil, expecting: %q", tt.wantErr)
} else {
assert.ErrorContains(t, err, tt.wantErr)
}

return
}

t.Log(logbuf.String()) // Echo log for debugging.
assert.NilError(t, err)

var result activities.ValidatePREMISResult
_ = enc.Get(&result)
assert.DeepEqual(t, result, tt.want)
})
}
}
Loading

0 comments on commit 2d7f5ea

Please sign in to comment.