Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Don't rename Prozess_Digitalisierung_PREMIS.xml #67

Merged
merged 1 commit into from
Oct 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion internal/activities/add_premis_objects_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ func TestAddPREMISObjects(t *testing.T) {
</premis:formatDesignation>
</premis:format>
</premis:objectCharacteristics>
<premis:originalName>data/metadata/Prozess_Digitalisierung_PREMIS_d_0000001.xml</premis:originalName>
<premis:originalName>data/metadata/Prozess_Digitalisierung_PREMIS.xml</premis:originalName>
</premis:object>
</premis:premis>
`)
Expand Down
42 changes: 20 additions & 22 deletions internal/activities/transform_sip.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import (
"context"
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"

Expand Down Expand Up @@ -38,31 +36,31 @@
return nil, err
}

// Move Prozess_Digitalisierung_PREMIS.xml files to the metadata directory.
err := filepath.WalkDir(params.SIP.ContentPath, func(p string, d fs.DirEntry, err error) error {
// Move the Prozess_Digitalisierung_PREMIS.xml file to the PIP metadata
// directory. Prozess_Digitalisierung_PREMIS.xml is only present in
// digitized SIPs, and there can only be one dossier in a digitized SIP.
if params.SIP.Type == enums.SIPTypeDigitizedSIP {
entries, err := os.ReadDir(params.SIP.ContentPath)
djjuhasz marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return err
return nil, err

Check warning on line 45 in internal/activities/transform_sip.go

View check run for this annotation

Codecov / codecov/patch

internal/activities/transform_sip.go#L45

Added line #L45 was not covered by tests
}
if d.Name() == "Prozess_Digitalisierung_PREMIS.xml" {
// Adding the parent dir to the filename reduces the likelihood of
// filename conflicts.
dir := filepath.Base(filepath.Dir(p))
dest := filepath.Join(mdPath, fmt.Sprintf("Prozess_Digitalisierung_PREMIS_%s.xml", dir))
err := fsutil.Move(p, dest)
if err != nil {
return err
}

p := filepath.Join(
params.SIP.ContentPath,
entries[0].Name(), // dossier name.
"Prozess_Digitalisierung_PREMIS.xml",
)

err = fsutil.Move(p, filepath.Join(mdPath, "Prozess_Digitalisierung_PREMIS.xml"))
if err != nil {
return nil, err

Check warning on line 56 in internal/activities/transform_sip.go

View check run for this annotation

Codecov / codecov/patch

internal/activities/transform_sip.go#L56

Added line #L56 was not covered by tests
}
return nil
})
if err != nil {
return nil, err
}

// Move UpdatedAreldaMetatdata.xml to the metadata directory (Digitized AIP
// only)
if params.SIP.Type == enums.SIPTypeDigitizedAIP {
err = fsutil.Move(
err := fsutil.Move(
params.SIP.UpdatedAreldaMDPath,
filepath.Join(mdPath, filepath.Base(params.SIP.UpdatedAreldaMDPath)),
)
Expand All @@ -72,13 +70,13 @@
}

// Create objects and [sip-name] sub-directories.
objectsPath := filepath.Join(params.SIP.Path, "objects", filepath.Base(params.SIP.Path))
if err = os.MkdirAll(objectsPath, 0o700); err != nil {
objectsPath := filepath.Join(params.SIP.Path, "objects", params.SIP.Name())
if err := os.MkdirAll(objectsPath, 0o700); err != nil {
return nil, err
}

// Move the content directory into the objects directory.
err = fsutil.Move(params.SIP.ContentPath, filepath.Join(objectsPath, "content"))
err := fsutil.Move(params.SIP.ContentPath, filepath.Join(objectsPath, "content"))
if err != nil {
return nil, err
}
Expand Down
21 changes: 11 additions & 10 deletions internal/activities/transform_sip_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ func TestTransformSIP(t *testing.T) {
fmode = os.FileMode(0o600)
)

digitizedAIPPath := fs.NewDir(t, "",
digitizedAIPPath := fs.NewDir(t, "Vecteur_Digitized_AIP",
fs.WithDir("additional",
fs.WithFile("UpdatedAreldaMetadata.xml", ""),
),
Expand All @@ -32,7 +32,6 @@ func TestTransformSIP(t *testing.T) {
fs.WithDir("d_0000001",
fs.WithFile("00000001.jp2", ""),
fs.WithFile("00000001_PREMIS.xml", ""),
fs.WithFile("Prozess_Digitalisierung_PREMIS.xml", ""),
),
),
fs.WithDir("header",
Expand All @@ -48,7 +47,7 @@ func TestTransformSIP(t *testing.T) {
),
).Path()

digitizedSIPPath := fs.NewDir(t, "Test_Digitized_SIP",
digitizedSIPPath := fs.NewDir(t, "Vecteur_Digitized_SIP",
fs.WithDir("content",
fs.WithDir("d_0000001",
fs.WithFile("00000001.jp2", ""),
Expand Down Expand Up @@ -84,7 +83,6 @@ func TestTransformSIP(t *testing.T) {
),
),
fs.WithDir("metadata", fs.WithMode(dmode),
fs.WithFile("Prozess_Digitalisierung_PREMIS_d_0000001.xml", "", fs.WithMode(fmode)),
fs.WithFile("UpdatedAreldaMetadata.xml", "", fs.WithMode(fmode)),
),
)
Expand All @@ -104,11 +102,11 @@ func TestTransformSIP(t *testing.T) {
),
),
fs.WithDir("metadata", fs.WithMode(dmode),
fs.WithFile("Prozess_Digitalisierung_PREMIS_d_0000001.xml", "", fs.WithMode(fmode)),
fs.WithFile("Prozess_Digitalisierung_PREMIS.xml", "", fs.WithMode(fmode)),
),
)

missingMetadataSIP, err := sip.New(fs.NewDir(t, "",
missingMetadataSIP, err := sip.New(fs.NewDir(t, "MissingMD_Vecteur_SIP",
fs.WithDir("content",
fs.WithDir("d_0000001",
fs.WithFile("00000001.jp2", ""),
Expand All @@ -118,7 +116,8 @@ func TestTransformSIP(t *testing.T) {
),
).Path())
assert.NilError(t, err)
missingContentSIP, err := sip.New(fs.NewDir(t, "",

missingContentSIP, err := sip.New(fs.NewDir(t, "Missing_Content_SIP",
fs.WithDir("header",
fs.WithFile("metadata.xml", ""),
),
Expand All @@ -142,7 +141,7 @@ func TestTransformSIP(t *testing.T) {
wantSIP: expectedDigitizedSIP,
},
{
name: "Fails with a SIP missing the metadata file",
name: "Fails when the metadata file is missing",
params: activities.TransformSIPParams{SIP: missingMetadataSIP},
wantErr: fmt.Sprintf(
"rename %s/header/metadata.xml %s/objects/%s/header/metadata.xml: no such file or directory",
Expand All @@ -152,11 +151,13 @@ func TestTransformSIP(t *testing.T) {
),
},
{
name: "Fails with a SIP missing the content directory",
name: "Fails when the content directory is missing",
params: activities.TransformSIPParams{SIP: missingContentSIP},
wantErr: fmt.Sprintf(
"lstat %s/content: no such file or directory",
"rename %s/content %s/objects/%s/content: no such file or directory (type: LinkError, retryable: true): no such file or directory",
missingContentSIP.Path,
missingContentSIP.Path,
filepath.Base(missingContentSIP.Path),
),
},
}
Expand Down
12 changes: 12 additions & 0 deletions internal/activities/validate_structure.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,18 @@
failures = append(failures, extras...)
}

// Check that digitized SIPs only have one dossier in the content dir.
if params.SIP.Type == enums.SIPTypeDigitizedSIP {
entries, err := os.ReadDir(params.SIP.ContentPath)
if err != nil {
return nil, fmt.Errorf("ValidateStructure: check for unexpected dossiers: %v", err)
}

Check warning on line 82 in internal/activities/validate_structure.go

View check run for this annotation

Codecov / codecov/patch

internal/activities/validate_structure.go#L81-L82

Added lines #L81 - L82 were not covered by tests

if len(entries) > 1 {
failures = append(failures, "More than one dossier in the content directory")
}
}

return &ValidateStructureResult{Failures: failures}, nil
}

Expand Down
40 changes: 39 additions & 1 deletion internal/activities/validate_structure_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,11 @@ func TestValidateStructure(t *testing.T) {

digitizedSIP, err := sip.New(fs.NewDir(t, "",
fs.WithDir("content",
fs.WithDir("d_0000001"),
fs.WithDir("d_0000001",
fs.WithFile("00000001.jp2", ""),
fs.WithFile("00000001_PREMIS.xml", ""),
fs.WithFile("Prozess_Digitalisierung_PREMIS.xml", ""),
),
),
fs.WithDir("header",
fs.WithFile("metadata.xml", ""),
Expand Down Expand Up @@ -74,6 +78,33 @@ func TestValidateStructure(t *testing.T) {
).Path())
assert.NilError(t, err)

digitizedSIPExtraDossiers, err := sip.New(fs.NewDir(t, "",
fs.WithDir("content",
fs.WithDir("d_0000001",
fs.WithFile("00000001.jp2", ""),
fs.WithFile("00000001_PREMIS.xml", ""),
fs.WithFile("Prozess_Digitalisierung_PREMIS.xml", ""),
),
fs.WithDir("d_0000002",
fs.WithFile("00000002.jp2", ""),
fs.WithFile("00000002_PREMIS.xml", ""),
fs.WithFile("Prozess_Digitalisierung_PREMIS.xml", ""),
),
fs.WithDir("d_0000003",
fs.WithFile("00000003.jp2", ""),
fs.WithFile("00000003_PREMIS.xml", ""),
fs.WithFile("Prozess_Digitalisierung_PREMIS.xml", ""),
),
),
fs.WithDir("header",
fs.WithFile("metadata.xml", ""),
fs.WithDir("xsd",
fs.WithFile("arelda.xsd", ""),
),
),
).Path())
assert.NilError(t, err)

tests := []struct {
name string
params activities.ValidateStructureParams
Expand Down Expand Up @@ -121,6 +152,13 @@ func TestValidateStructure(t *testing.T) {
},
},
},
{
name: "Returns a failure when a digitized SIP has more than one dossier",
params: activities.ValidateStructureParams{SIP: digitizedSIPExtraDossiers},
want: activities.ValidateStructureResult{
Failures: []string{"More than one dossier in the content directory"},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand Down
2 changes: 1 addition & 1 deletion internal/activities/write_identifier_file_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ func TestWriteIdentifierFile(t *testing.T) {
},
wantJSON: `[
{
"file": "metadata/Prozess_Digitalisierung_PREMIS_d_0000001.xml",
"file": "metadata/Prozess_Digitalisierung_PREMIS.xml",
"identifiers": [
{
"identifier": "_cQ6sm5CChWVqtqmrWvne0W",
Expand Down
25 changes: 10 additions & 15 deletions internal/pips/pips.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package pips

import (
"fmt"
"path/filepath"
"strings"

Expand Down Expand Up @@ -42,20 +41,16 @@ func (p PIP) Name() string {
}

func (p PIP) ConvertSIPPath(path string) string {
switch {
case filepath.Base(path) == "Prozess_Digitalisierung_PREMIS.xml":
parent := filepath.Base(filepath.Dir(path))
return filepath.Join(
"metadata",
fmt.Sprintf("Prozess_Digitalisierung_PREMIS_%s.xml", parent),
)
case filepath.Base(path) == "metadata.xml":
return filepath.Join("objects", p.Name(), "header", "metadata.xml")
case filepath.Base(path) == "UpdatedAreldaMetadata.xml":
return filepath.Join("metadata", "UpdatedAreldaMetadata.xml")
case strings.HasPrefix(path, "content"):
switch name := filepath.Base(path); name {
case "Prozess_Digitalisierung_PREMIS.xml", "UpdatedAreldaMetadata.xml":
return filepath.Join("metadata", name)
case "metadata.xml":
return filepath.Join("objects", p.Name(), "header", name)
}

if strings.HasPrefix(path, "content") {
return filepath.Join("objects", p.Name(), path)
default:
return ""
}

return ""
}
2 changes: 1 addition & 1 deletion internal/pips/pips_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ func TestConvertSIPPath(t *testing.T) {
p := pips.New("/path/to/SIP_20201201_Vecteur", enums.SIPTypeDigitizedSIP)
assert.Equal(t,
p.ConvertSIPPath("content/d_0000001/Prozess_Digitalisierung_PREMIS.xml"),
"metadata/Prozess_Digitalisierung_PREMIS_d_0000001.xml",
"metadata/Prozess_Digitalisierung_PREMIS.xml",
)
assert.Equal(t,
p.ConvertSIPPath("header/metadata.xml"),
Expand Down
10 changes: 4 additions & 6 deletions internal/premis/premis.go
Original file line number Diff line number Diff line change
Expand Up @@ -377,12 +377,10 @@ func FilesWithinDirectory(contentPath string) ([]string, error) {
}

func OriginalNameForSubpath(sip sip.SIP, subpath string) string {
// Handle one file differently (as it gets renamed latest in TransformSIP).
// Prozess_Digitalisierung_PREMIS.xml is moved to the metadata directory.
if filepath.Base(subpath) == "Prozess_Digitalisierung_PREMIS.xml" {
parentDirName := filepath.Base(filepath.Dir(subpath))
filename := fmt.Sprintf("Prozess_Digitalisierung_PREMIS_%s.xml", parentDirName)
return filepath.Join("data", "metadata", filename)
} else {
return filepath.Join("data", "objects", filepath.Base(sip.Path), "content", subpath)
return filepath.Join("data", "metadata", "Prozess_Digitalisierung_PREMIS.xml")
}

return filepath.Join("data", "objects", sip.Name(), "content", subpath)
}
2 changes: 1 addition & 1 deletion internal/premis/premis_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,5 +348,5 @@ func TestOriginalNameForSubpath(t *testing.T) {
)

assert.Equal(t, metadataOriginalName,
"data/metadata/Prozess_Digitalisierung_PREMIS_d_0000001.xml")
"data/metadata/Prozess_Digitalisierung_PREMIS.xml")
}
3 changes: 1 addition & 2 deletions internal/sip/sip.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"fmt"
"os"
"path/filepath"
"strings"

"github.com/artefactual-sdps/preprocessing-sfa/internal/enums"
"github.com/artefactual-sdps/preprocessing-sfa/internal/fsutil"
Expand Down Expand Up @@ -55,7 +54,7 @@ func New(path string) (SIP, error) {
if err != nil {
return s, fmt.Errorf("SIP: New: %v", err)
}
if len(f) > 0 && strings.Contains(strings.ToLower(s.Path), "vecteur") {
if len(f) > 0 {
djjuhasz marked this conversation as resolved.
Show resolved Hide resolved
return s.digitizedSIP(), nil
}

Expand Down
Loading