Skip to content

Commit

Permalink
Fix born digital SIP PREMIS event recording (#19)
Browse files Browse the repository at this point in the history
Added support for born digital SIPs to PREMIS event recording.
  • Loading branch information
mcantelon committed Aug 28, 2024
1 parent 57d0193 commit 2eb4b55
Show file tree
Hide file tree
Showing 8 changed files with 94 additions and 33 deletions.
7 changes: 4 additions & 3 deletions internal/activities/add_premis_objects.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@ import (
"github.com/google/uuid"

"github.com/artefactual-sdps/preprocessing-sfa/internal/premis"
"github.com/artefactual-sdps/preprocessing-sfa/internal/sip"
)

const AddPREMISObjectsName = "add-premis-objects"

type AddPREMISObjectsParams struct {
SIP sip.SIP
PREMISFilePath string
ContentPath string
}

type AddPREMISObjectsResult struct{}
Expand All @@ -30,7 +31,7 @@ func (md *AddPREMISObjectsActivity) Execute(
params *AddPREMISObjectsParams,
) (*AddPREMISObjectsResult, error) {
// Get subpaths of files in transfer.
subpaths, err := premis.FilesWithinDirectory(params.ContentPath)
subpaths, err := premis.FilesWithinDirectory(params.SIP.ContentPath)
if err != nil {
return nil, err
}
Expand All @@ -50,7 +51,7 @@ func (md *AddPREMISObjectsActivity) Execute(
object := premis.Object{
IdType: "UUID",
IdValue: uuid.New().String(),
OriginalName: premis.OriginalNameForSubpath(params.ContentPath, subpath),
OriginalName: premis.OriginalNameForSubpath(params.SIP, subpath),
}

err = premis.AppendObjectXML(doc, object)
Expand Down
19 changes: 15 additions & 4 deletions internal/activities/add_premis_objects_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ import (
"gotest.tools/v3/fs"

"github.com/artefactual-sdps/preprocessing-sfa/internal/activities"
"github.com/artefactual-sdps/preprocessing-sfa/internal/enums"
"github.com/artefactual-sdps/preprocessing-sfa/internal/premis"
"github.com/artefactual-sdps/preprocessing-sfa/internal/sip"
)

func TestAddPREMISObjects(t *testing.T) {
Expand Down Expand Up @@ -62,24 +64,33 @@ func TestAddPREMISObjects(t *testing.T) {
{
name: "Add PREMIS objects for normal content",
params: activities.AddPREMISObjectsParams{
SIP: sip.SIP{
Type: enums.SIPTypeDigitizedAIP,
ContentPath: ContentFilesNormal.Path(),
},
PREMISFilePath: PREMISFilePathNormal,
ContentPath: ContentFilesNormal.Path(),
},
result: activities.AddPREMISObjectsResult{},
},
{
name: "Add PREMIS objects for no content",
params: activities.AddPREMISObjectsParams{
SIP: sip.SIP{
Type: enums.SIPTypeDigitizedAIP,
ContentPath: ContentNoFiles.Path(),
},
PREMISFilePath: PREMISFilePathNoFiles,
ContentPath: ContentNoFiles.Path(),
},
result: activities.AddPREMISObjectsResult{},
},
{
name: "Add PREMIS objects for bad path",
params: activities.AddPREMISObjectsParams{
SIP: sip.SIP{
Type: enums.SIPTypeDigitizedAIP,
ContentPath: ContentNonExistent.Path(),
},
PREMISFilePath: PREMISFilePathNonExistent,
ContentPath: ContentNonExistent.Path(),
},
result: activities.AddPREMISObjectsResult{},
wantErr: "no such file or directory",
Expand Down Expand Up @@ -116,7 +127,7 @@ func TestAddPREMISObjects(t *testing.T) {
assert.DeepEqual(t, res, tt.result)

// If the content directory has files, make sure that PREMIS file can be parsed.
contentFiles, err := premis.FilesWithinDirectory(tt.params.ContentPath)
contentFiles, err := premis.FilesWithinDirectory(tt.params.SIP.ContentPath)
assert.NilError(t, err)

if len(contentFiles) > 0 {
Expand Down
9 changes: 5 additions & 4 deletions internal/activities/validate_file_formats.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@ import (

"github.com/artefactual-sdps/preprocessing-sfa/internal/fformat"
"github.com/artefactual-sdps/preprocessing-sfa/internal/premis"
"github.com/artefactual-sdps/preprocessing-sfa/internal/sip"
)

const ValidateFileFormatsName = "validate-file-formats"

type ValidateFileFormatsParams struct {
ContentPath string
SIP sip.SIP
PREMISFilePath string
Agent premis.Agent
}
Expand Down Expand Up @@ -72,7 +73,7 @@ func (a *ValidateFileFormats) Execute(
"fmt/653": {},
}

err := filepath.WalkDir(params.ContentPath, func(p string, d fs.DirEntry, err error) error {
err := filepath.WalkDir(params.SIP.ContentPath, func(p string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
Expand Down Expand Up @@ -106,13 +107,13 @@ func (a *ValidateFileFormats) Execute(
}

// Get subpath within content.
subpath, err := filepath.Rel(params.ContentPath, p)
subpath, err := filepath.Rel(params.SIP.ContentPath, p)
if err != nil {
return err
}

// Append PREMIS event to XML and write results.
originalName := premis.OriginalNameForSubpath(params.ContentPath, subpath)
originalName := premis.OriginalNameForSubpath(params.SIP, subpath)

doc, err := premis.ParseOrInitialize(params.PREMISFilePath)
if err != nil {
Expand Down
26 changes: 20 additions & 6 deletions internal/activities/validate_file_formats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ import (
"gotest.tools/v3/fs"

"github.com/artefactual-sdps/preprocessing-sfa/internal/activities"
"github.com/artefactual-sdps/preprocessing-sfa/internal/enums"
"github.com/artefactual-sdps/preprocessing-sfa/internal/premis"
"github.com/artefactual-sdps/preprocessing-sfa/internal/sip"
)

const pngContent = "\x89PNG\r\n\x1a\n\x00\x00\x00\x0DIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x02\x00\x00\x00\x90\x77\x53\xDE\x00\x00\x00\x00IEND\xAE\x42\x60\x82"
Expand Down Expand Up @@ -127,7 +129,10 @@ func TestValidateFileFormats(t *testing.T) {
{
name: "Successes with valid formats",
params: activities.ValidateFileFormatsParams{
ContentPath: validFormatsContentPath,
SIP: sip.SIP{
Type: enums.SIPTypeDigitizedAIP,
ContentPath: validFormatsContentPath,
},
PREMISFilePath: fs.NewFile(t, "premis.xml",
fs.WithContent(premisValidFormatsContent),
).Path(),
Expand All @@ -138,7 +143,10 @@ func TestValidateFileFormats(t *testing.T) {
{
name: "Fails with invalid formats",
params: activities.ValidateFileFormatsParams{
ContentPath: invalidFormatsContentPath,
SIP: sip.SIP{
Type: enums.SIPTypeDigitizedAIP,
ContentPath: invalidFormatsContentPath,
},
PREMISFilePath: fs.NewFile(t, "premis.xml",
fs.WithContent(premisInvalidFormatsContent),
).Path(),
Expand All @@ -163,7 +171,10 @@ func TestValidateFileFormats(t *testing.T) {
{
name: "Fails with an invalid content path",
params: activities.ValidateFileFormatsParams{
ContentPath: "/path/to/missing/dir",
SIP: sip.SIP{
Type: enums.SIPTypeDigitizedAIP,
ContentPath: "/path/to/missing/dir",
},
PREMISFilePath: fs.NewFile(t, "premis.xml",
fs.WithContent(premis.EmptyXML),
).Path(),
Expand All @@ -174,9 +185,12 @@ func TestValidateFileFormats(t *testing.T) {
{
name: "Fails with empty source",
params: activities.ValidateFileFormatsParams{
ContentPath: fs.NewDir(t, "",
fs.WithFile("file.txt", ""),
).Path(),
SIP: sip.SIP{
Type: enums.SIPTypeDigitizedAIP,
ContentPath: fs.NewDir(t, "",
fs.WithFile("file.txt", ""),
).Path(),
},
PREMISFilePath: fs.NewFile(t, "premis.xml",
fs.WithContent(premis.EmptyXML),
).Path(),
Expand Down
15 changes: 12 additions & 3 deletions internal/premis/premis.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ import (
"github.com/beevik/etree"
"github.com/google/uuid"

"github.com/artefactual-sdps/preprocessing-sfa/internal/enums"
"github.com/artefactual-sdps/preprocessing-sfa/internal/fsutil"
"github.com/artefactual-sdps/preprocessing-sfa/internal/sip"
)

const EmptyXML = `<?xml version="1.0" encoding="UTF-8"?>
Expand Down Expand Up @@ -419,11 +421,18 @@ func FilesWithinDirectory(contentPath string) ([]string, error) {
return subpaths, nil
}

func OriginalNameForSubpath(contentPath, subpath string) string {
transferDirName := filepath.Base(filepath.Dir(filepath.Dir(contentPath)))
func OriginalNameForSubpath(sip sip.SIP, subpath string) string {
// Handle born digital SIP structure separately.
var transferDirName string

if sip.Type == enums.SIPTypeBornDigital {
transferDirName = filepath.Base(filepath.Dir(sip.ContentPath))
} else {
transferDirName = filepath.Base(filepath.Dir(filepath.Dir(sip.ContentPath)))
}

// Handle one file differently (as it gets renamed latest in TransformSIP).
if filepath.Base(subpath) == "Prozess_Digitalisierung_PREMIS.xml" {
// This file later gets renamed in TransformSIP.
parentDirName := filepath.Base(filepath.Dir(subpath))
filename := fmt.Sprintf("Prozess_Digitalisierung_PREMIS_%s.xml", parentDirName)
return filepath.Join("data", "metadata", filename)
Expand Down
41 changes: 33 additions & 8 deletions internal/premis/premis_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ import (
"gotest.tools/v3/assert"
"gotest.tools/v3/fs"

"github.com/artefactual-sdps/preprocessing-sfa/internal/enums"
"github.com/artefactual-sdps/preprocessing-sfa/internal/premis"
"github.com/artefactual-sdps/preprocessing-sfa/internal/sip"
)

const premisObjectAddContent = `<?xml version="1.0" encoding="UTF-8"?>
Expand Down Expand Up @@ -219,22 +221,41 @@ func TestFilesWithinDirectory(t *testing.T) {
}

func TestOriginalNameForSubpath(t *testing.T) {
// Check for correct adjustment of file paths in PREMIS.
originalName := premis.OriginalNameForSubpath(
"test_transfer/content/content",
// Check for correct adjustment of AIP file path in PREMIS.
aipSIP := sip.SIP{
Type: enums.SIPTypeDigitizedAIP,
ContentPath: "test_transfer/content/content",
}

aipOriginalName := premis.OriginalNameForSubpath(
aipSIP,
"d_0000001/00000001.jp2",
)

assert.Equal(t, aipOriginalName,
"data/objects/test_transfer/content/d_0000001/00000001.jp2")

// Check for correct adjustment of born digital SIP file path in PREMIS.
sipSIP := sip.SIP{
Type: enums.SIPTypeBornDigital,
ContentPath: "test_transfer/content",
}

sipOriginalName := premis.OriginalNameForSubpath(
sipSIP,
"d_0000001/00000001.jp2",
)

assert.Equal(t, originalName,
assert.Equal(t, sipOriginalName,
"data/objects/test_transfer/content/d_0000001/00000001.jp2")

// Check for special handling of this specific file's path in PREMIS.
originalName = premis.OriginalNameForSubpath(
"test_transfer/content/content",
metadataOriginalName := premis.OriginalNameForSubpath(
aipSIP,
"content/content/d_0000001/Prozess_Digitalisierung_PREMIS.xml",
)

assert.Equal(t, originalName,
assert.Equal(t, metadataOriginalName,
"data/metadata/Prozess_Digitalisierung_PREMIS_d_0000001.xml")
}

Expand All @@ -257,7 +278,11 @@ func TestAppendPREMISEventAndLinkToObject(t *testing.T) {
err = doc.ReadFromString(premisObjectAddContent)
assert.NilError(t, err)

originalName := premis.OriginalNameForSubpath("test_transfer/content/content", "cat.jpg")
aipSIP := sip.SIP{
Type: enums.SIPTypeDigitizedAIP,
ContentPath: "test_transfer/content/content",
}
originalName := premis.OriginalNameForSubpath(aipSIP, "cat.jpg")

err = premis.AppendEventXMLForSingleObject(doc, eventSummary, premis.AgentDefault(), originalName)
assert.NilError(t, err)
Expand Down
4 changes: 2 additions & 2 deletions internal/workflow/preprocessing.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,8 @@ func (w *PreprocessingWorkflow) Execute(
withLocalActOpts(ctx),
activities.AddPREMISObjectsName,
&activities.AddPREMISObjectsParams{
SIP: identifySIP.SIP,
PREMISFilePath: premisFilePath,
ContentPath: identifySIP.SIP.ContentPath,
},
).Get(ctx, &addPREMISObjects)
if e != nil {
Expand Down Expand Up @@ -210,7 +210,7 @@ func (w *PreprocessingWorkflow) Execute(
withLocalActOpts(ctx),
activities.ValidateFileFormatsName,
&activities.ValidateFileFormatsParams{
ContentPath: identifySIP.SIP.ContentPath,
SIP: identifySIP.SIP,
PREMISFilePath: premisFilePath,
Agent: premis.AgentDefault(),
},
Expand Down
6 changes: 3 additions & 3 deletions internal/workflow/preprocessing_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,8 +243,8 @@ func (s *PreprocessingTestSuite) TestPreprocessingWorkflowSuccess() {
activities.AddPREMISObjectsName,
sessionCtx,
&activities.AddPREMISObjectsParams{
SIP: expectedSIP,
PREMISFilePath: premisFilePath,
ContentPath: expectedSIP.ContentPath,
},
).Return(
&activities.AddPREMISObjectsResult{}, nil,
Expand All @@ -267,7 +267,7 @@ func (s *PreprocessingTestSuite) TestPreprocessingWorkflowSuccess() {
activities.ValidateFileFormatsName,
sessionCtx,
&activities.ValidateFileFormatsParams{
ContentPath: expectedSIP.ContentPath,
SIP: expectedSIP,
PREMISFilePath: premisFilePath,
Agent: premis.AgentDefault(),
},
Expand Down Expand Up @@ -478,7 +478,7 @@ func (s *PreprocessingTestSuite) TestPreprocessingWorkflowValidationFails() {
activities.ValidateFileFormatsName,
sessionCtx,
&activities.ValidateFileFormatsParams{
ContentPath: expectedSIP.ContentPath,
SIP: expectedSIP,
PREMISFilePath: premisFilePath,
Agent: premis.AgentDefault(),
},
Expand Down

0 comments on commit 2eb4b55

Please sign in to comment.