Skip to content

Commit

Permalink
Don't rename Prozess_Digitalisierung_PREMIS.xml
Browse files Browse the repository at this point in the history
Fixes #58

Digitized (Vecteur) SIPs are the only type of SIP that contain a
Prozess_Digitalisierung_PREMIS.xml file, and they will only ever
contain a single dossier.  Because digitized SIPs only have a single
dossier that contains a single Prozess_Digitalisierung_PREMIS.xml file
there's no need to rename the file in the PIP to avoid name clashes.

Changes:
- Update the SIP identification logic - if a SIP has a
  Prozess_Digitalisierung_PREMIS.xml file then it must be a digitized
  SIP
- Add a SIP validation check to confirm that digitized SIPs only contain
  a single dossier
- Remove the SIP transformation code that renamed the
  Prozess_Digitalisierung_PREMIS.xml file
  • Loading branch information
djjuhasz committed Oct 23, 2024
1 parent effe3eb commit bc5d806
Show file tree
Hide file tree
Showing 11 changed files with 101 additions and 60 deletions.
2 changes: 1 addition & 1 deletion internal/activities/add_premis_objects_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ func TestAddPREMISObjects(t *testing.T) {
</premis:formatDesignation>
</premis:format>
</premis:objectCharacteristics>
<premis:originalName>data/metadata/Prozess_Digitalisierung_PREMIS_d_0000001.xml</premis:originalName>
<premis:originalName>data/metadata/Prozess_Digitalisierung_PREMIS.xml</premis:originalName>
</premis:object>
</premis:premis>
`)
Expand Down
42 changes: 20 additions & 22 deletions internal/activities/transform_sip.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@ package activities
import (
"context"
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"

Expand Down Expand Up @@ -38,31 +36,31 @@ func (a *TransformSIP) Execute(ctx context.Context, params *TransformSIPParams)
return nil, err
}

// Move Prozess_Digitalisierung_PREMIS.xml files to the metadata directory.
err := filepath.WalkDir(params.SIP.ContentPath, func(p string, d fs.DirEntry, err error) error {
// Move the Prozess_Digitalisierung_PREMIS.xml file to the PIP metadata
// directory. Prozess_Digitalisierung_PREMIS.xml is only present in
// digitized SIPs, and there can only be one dossier in a digitized SIP.
if params.SIP.Type == enums.SIPTypeDigitizedSIP {
entries, err := os.ReadDir(params.SIP.ContentPath)
if err != nil {
return err
return nil, err

Check warning on line 45 in internal/activities/transform_sip.go

View check run for this annotation

Codecov / codecov/patch

internal/activities/transform_sip.go#L45

Added line #L45 was not covered by tests
}
if d.Name() == "Prozess_Digitalisierung_PREMIS.xml" {
// Adding the parent dir to the filename reduces the likelihood of
// filename conflicts.
dir := filepath.Base(filepath.Dir(p))
dest := filepath.Join(mdPath, fmt.Sprintf("Prozess_Digitalisierung_PREMIS_%s.xml", dir))
err := fsutil.Move(p, dest)
if err != nil {
return err
}

p := filepath.Join(
params.SIP.ContentPath,
entries[0].Name(), // dossier name.
"Prozess_Digitalisierung_PREMIS.xml",
)

err = fsutil.Move(p, filepath.Join(mdPath, "Prozess_Digitalisierung_PREMIS.xml"))
if err != nil {
return nil, err

Check warning on line 56 in internal/activities/transform_sip.go

View check run for this annotation

Codecov / codecov/patch

internal/activities/transform_sip.go#L56

Added line #L56 was not covered by tests
}
return nil
})
if err != nil {
return nil, err
}

// Move UpdatedAreldaMetatdata.xml to the metadata directory (Digitized AIP
// only)
if params.SIP.Type == enums.SIPTypeDigitizedAIP {
err = fsutil.Move(
err := fsutil.Move(
params.SIP.UpdatedAreldaMDPath,
filepath.Join(mdPath, filepath.Base(params.SIP.UpdatedAreldaMDPath)),
)
Expand All @@ -72,13 +70,13 @@ func (a *TransformSIP) Execute(ctx context.Context, params *TransformSIPParams)
}

// Create objects and [sip-name] sub-directories.
objectsPath := filepath.Join(params.SIP.Path, "objects", filepath.Base(params.SIP.Path))
if err = os.MkdirAll(objectsPath, 0o700); err != nil {
objectsPath := filepath.Join(params.SIP.Path, "objects", params.SIP.Name())
if err := os.MkdirAll(objectsPath, 0o700); err != nil {
return nil, err
}

// Move the content directory into the objects directory.
err = fsutil.Move(params.SIP.ContentPath, filepath.Join(objectsPath, "content"))
err := fsutil.Move(params.SIP.ContentPath, filepath.Join(objectsPath, "content"))
if err != nil {
return nil, err
}
Expand Down
21 changes: 11 additions & 10 deletions internal/activities/transform_sip_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ func TestTransformSIP(t *testing.T) {
fmode = os.FileMode(0o600)
)

digitizedAIPPath := fs.NewDir(t, "",
digitizedAIPPath := fs.NewDir(t, "Vecteur_Digitized_AIP",
fs.WithDir("additional",
fs.WithFile("UpdatedAreldaMetadata.xml", ""),
),
Expand All @@ -32,7 +32,6 @@ func TestTransformSIP(t *testing.T) {
fs.WithDir("d_0000001",
fs.WithFile("00000001.jp2", ""),
fs.WithFile("00000001_PREMIS.xml", ""),
fs.WithFile("Prozess_Digitalisierung_PREMIS.xml", ""),
),
),
fs.WithDir("header",
Expand All @@ -48,7 +47,7 @@ func TestTransformSIP(t *testing.T) {
),
).Path()

digitizedSIPPath := fs.NewDir(t, "Test_Digitized_SIP",
digitizedSIPPath := fs.NewDir(t, "Vecteur_Digitized_SIP",
fs.WithDir("content",
fs.WithDir("d_0000001",
fs.WithFile("00000001.jp2", ""),
Expand Down Expand Up @@ -84,7 +83,6 @@ func TestTransformSIP(t *testing.T) {
),
),
fs.WithDir("metadata", fs.WithMode(dmode),
fs.WithFile("Prozess_Digitalisierung_PREMIS_d_0000001.xml", "", fs.WithMode(fmode)),
fs.WithFile("UpdatedAreldaMetadata.xml", "", fs.WithMode(fmode)),
),
)
Expand All @@ -104,11 +102,11 @@ func TestTransformSIP(t *testing.T) {
),
),
fs.WithDir("metadata", fs.WithMode(dmode),
fs.WithFile("Prozess_Digitalisierung_PREMIS_d_0000001.xml", "", fs.WithMode(fmode)),
fs.WithFile("Prozess_Digitalisierung_PREMIS.xml", "", fs.WithMode(fmode)),
),
)

missingMetadataSIP, err := sip.New(fs.NewDir(t, "",
missingMetadataSIP, err := sip.New(fs.NewDir(t, "MissingMD_Vecteur_SIP",
fs.WithDir("content",
fs.WithDir("d_0000001",
fs.WithFile("00000001.jp2", ""),
Expand All @@ -118,7 +116,8 @@ func TestTransformSIP(t *testing.T) {
),
).Path())
assert.NilError(t, err)
missingContentSIP, err := sip.New(fs.NewDir(t, "",

missingContentSIP, err := sip.New(fs.NewDir(t, "Missing_Content_SIP",
fs.WithDir("header",
fs.WithFile("metadata.xml", ""),
),
Expand All @@ -142,7 +141,7 @@ func TestTransformSIP(t *testing.T) {
wantSIP: expectedDigitizedSIP,
},
{
name: "Fails with a SIP missing the metadata file",
name: "Fails when the metadata file is missing",
params: activities.TransformSIPParams{SIP: missingMetadataSIP},
wantErr: fmt.Sprintf(
"rename %s/header/metadata.xml %s/objects/%s/header/metadata.xml: no such file or directory",
Expand All @@ -152,11 +151,13 @@ func TestTransformSIP(t *testing.T) {
),
},
{
name: "Fails with a SIP missing the content directory",
name: "Fails when the content directory is missing",
params: activities.TransformSIPParams{SIP: missingContentSIP},
wantErr: fmt.Sprintf(
"lstat %s/content: no such file or directory",
"rename %s/content %s/objects/%s/content: no such file or directory (type: LinkError, retryable: true): no such file or directory",
missingContentSIP.Path,
missingContentSIP.Path,
filepath.Base(missingContentSIP.Path),
),
},
}
Expand Down
12 changes: 12 additions & 0 deletions internal/activities/validate_structure.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,18 @@ func (a *ValidateStructure) Execute(
failures = append(failures, extras...)
}

// Check that digitized SIPs only have one dossier in the content dir.
if params.SIP.Type == enums.SIPTypeDigitizedSIP {
entries, err := os.ReadDir(params.SIP.ContentPath)
if err != nil {
return nil, fmt.Errorf("ValidateStructure: check for unexpected dossiers: %v", err)
}

Check warning on line 82 in internal/activities/validate_structure.go

View check run for this annotation

Codecov / codecov/patch

internal/activities/validate_structure.go#L81-L82

Added lines #L81 - L82 were not covered by tests

if len(entries) > 1 {
failures = append(failures, "More than one dossier in the content directory")
}
}

return &ValidateStructureResult{Failures: failures}, nil
}

Expand Down
40 changes: 39 additions & 1 deletion internal/activities/validate_structure_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,11 @@ func TestValidateStructure(t *testing.T) {

digitizedSIP, err := sip.New(fs.NewDir(t, "",
fs.WithDir("content",
fs.WithDir("d_0000001"),
fs.WithDir("d_0000001",
fs.WithFile("00000001.jp2", ""),
fs.WithFile("00000001_PREMIS.xml", ""),
fs.WithFile("Prozess_Digitalisierung_PREMIS.xml", ""),
),
),
fs.WithDir("header",
fs.WithFile("metadata.xml", ""),
Expand Down Expand Up @@ -74,6 +78,33 @@ func TestValidateStructure(t *testing.T) {
).Path())
assert.NilError(t, err)

digitizedSIPExtraDossiers, err := sip.New(fs.NewDir(t, "",
fs.WithDir("content",
fs.WithDir("d_0000001",
fs.WithFile("00000001.jp2", ""),
fs.WithFile("00000001_PREMIS.xml", ""),
fs.WithFile("Prozess_Digitalisierung_PREMIS.xml", ""),
),
fs.WithDir("d_0000002",
fs.WithFile("00000002.jp2", ""),
fs.WithFile("00000002_PREMIS.xml", ""),
fs.WithFile("Prozess_Digitalisierung_PREMIS.xml", ""),
),
fs.WithDir("d_0000003",
fs.WithFile("00000003.jp2", ""),
fs.WithFile("00000003_PREMIS.xml", ""),
fs.WithFile("Prozess_Digitalisierung_PREMIS.xml", ""),
),
),
fs.WithDir("header",
fs.WithFile("metadata.xml", ""),
fs.WithDir("xsd",
fs.WithFile("arelda.xsd", ""),
),
),
).Path())
assert.NilError(t, err)

tests := []struct {
name string
params activities.ValidateStructureParams
Expand Down Expand Up @@ -121,6 +152,13 @@ func TestValidateStructure(t *testing.T) {
},
},
},
{
name: "Returns a failure when a digitized SIP has more than one dossier",
params: activities.ValidateStructureParams{SIP: digitizedSIPExtraDossiers},
want: activities.ValidateStructureResult{
Failures: []string{"More than one dossier in the content directory"},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand Down
2 changes: 1 addition & 1 deletion internal/activities/write_identifier_file_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ func TestWriteIdentifierFile(t *testing.T) {
},
wantJSON: `[
{
"file": "metadata/Prozess_Digitalisierung_PREMIS_d_0000001.xml",
"file": "metadata/Prozess_Digitalisierung_PREMIS.xml",
"identifiers": [
{
"identifier": "_cQ6sm5CChWVqtqmrWvne0W",
Expand Down
25 changes: 10 additions & 15 deletions internal/pips/pips.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package pips

import (
"fmt"
"path/filepath"
"strings"

Expand Down Expand Up @@ -42,20 +41,16 @@ func (p PIP) Name() string {
}

func (p PIP) ConvertSIPPath(path string) string {
switch {
case filepath.Base(path) == "Prozess_Digitalisierung_PREMIS.xml":
parent := filepath.Base(filepath.Dir(path))
return filepath.Join(
"metadata",
fmt.Sprintf("Prozess_Digitalisierung_PREMIS_%s.xml", parent),
)
case filepath.Base(path) == "metadata.xml":
return filepath.Join("objects", p.Name(), "header", "metadata.xml")
case filepath.Base(path) == "UpdatedAreldaMetadata.xml":
return filepath.Join("metadata", "UpdatedAreldaMetadata.xml")
case strings.HasPrefix(path, "content"):
switch name := filepath.Base(path); name {
case "Prozess_Digitalisierung_PREMIS.xml", "UpdatedAreldaMetadata.xml":
return filepath.Join("metadata", name)
case "metadata.xml":
return filepath.Join("objects", p.Name(), "header", name)
}

if strings.HasPrefix(path, "content") {
return filepath.Join("objects", p.Name(), path)
default:
return ""
}

return ""
}
2 changes: 1 addition & 1 deletion internal/pips/pips_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ func TestConvertSIPPath(t *testing.T) {
p := pips.New("/path/to/SIP_20201201_Vecteur", enums.SIPTypeDigitizedSIP)
assert.Equal(t,
p.ConvertSIPPath("content/d_0000001/Prozess_Digitalisierung_PREMIS.xml"),
"metadata/Prozess_Digitalisierung_PREMIS_d_0000001.xml",
"metadata/Prozess_Digitalisierung_PREMIS.xml",
)
assert.Equal(t,
p.ConvertSIPPath("header/metadata.xml"),
Expand Down
10 changes: 4 additions & 6 deletions internal/premis/premis.go
Original file line number Diff line number Diff line change
Expand Up @@ -377,12 +377,10 @@ func FilesWithinDirectory(contentPath string) ([]string, error) {
}

func OriginalNameForSubpath(sip sip.SIP, subpath string) string {
// Handle one file differently (as it gets renamed latest in TransformSIP).
// Prozess_Digitalisierung_PREMIS.xml is moved to the metadata directory.
if filepath.Base(subpath) == "Prozess_Digitalisierung_PREMIS.xml" {
parentDirName := filepath.Base(filepath.Dir(subpath))
filename := fmt.Sprintf("Prozess_Digitalisierung_PREMIS_%s.xml", parentDirName)
return filepath.Join("data", "metadata", filename)
} else {
return filepath.Join("data", "objects", filepath.Base(sip.Path), "content", subpath)
return filepath.Join("data", "metadata", "Prozess_Digitalisierung_PREMIS.xml")
}

return filepath.Join("data", "objects", sip.Name(), "content", subpath)
}
2 changes: 1 addition & 1 deletion internal/premis/premis_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,5 +348,5 @@ func TestOriginalNameForSubpath(t *testing.T) {
)

assert.Equal(t, metadataOriginalName,
"data/metadata/Prozess_Digitalisierung_PREMIS_d_0000001.xml")
"data/metadata/Prozess_Digitalisierung_PREMIS.xml")
}
3 changes: 1 addition & 2 deletions internal/sip/sip.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"fmt"
"os"
"path/filepath"
"strings"

"github.com/artefactual-sdps/preprocessing-sfa/internal/enums"
"github.com/artefactual-sdps/preprocessing-sfa/internal/fsutil"
Expand Down Expand Up @@ -55,7 +54,7 @@ func New(path string) (SIP, error) {
if err != nil {
return s, fmt.Errorf("SIP: New: %v", err)
}
if len(f) > 0 && strings.Contains(strings.ToLower(s.Path), "vecteur") {
if len(f) > 0 {
return s.digitizedSIP(), nil
}

Expand Down

0 comments on commit bc5d806

Please sign in to comment.