Skip to content

Commit

Permalink
Add logical metadata file to AIP structure
Browse files Browse the repository at this point in the history
Refs #98

Add the logical metadata file to the expected SIP structure for the AIP
SIP types. Update activities related to SIP structure: identify SIP,
validate files, validate structure, and verify manifest.
  • Loading branch information
djjuhasz committed Dec 18, 2024
1 parent a682cec commit 3734440
Show file tree
Hide file tree
Showing 7 changed files with 159 additions and 86 deletions.
12 changes: 8 additions & 4 deletions internal/activities/identify_sip_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,15 @@ func TestIdentifySIP(t *testing.T) {
t.Parallel()

path := fs.NewDir(t, "",
fs.WithDir("content",
fs.WithDir("d_0000001",
fs.WithFile("Prozess_Digitalisierung_PREMIS.xml", ""),
fs.WithDir("Digitized-AIP",
fs.WithDir("content",
fs.WithDir("d_0000001",
fs.WithFile("Prozess_Digitalisierung_PREMIS.xml", ""),
),
),
fs.WithDir("additional"),
),
fs.WithDir("additional")).Path()
).Join("Digitized-AIP")

tests := []struct {
name string
Expand All @@ -39,6 +42,7 @@ func TestIdentifySIP(t *testing.T) {
Type: enums.SIPTypeDigitizedAIP,
Path: path,
ContentPath: filepath.Join(path, "content", "content"),
LogicalMDPath: filepath.Join(path, "additional", "Digitized-AIP-premis.xml"),
ManifestPath: filepath.Join(path, "additional", "UpdatedAreldaMetadata.xml"),
MetadataPath: filepath.Join(path, "content", "header", "old", "SIP", "metadata.xml"),
UpdatedAreldaMDPath: filepath.Join(path, "additional", "UpdatedAreldaMetadata.xml"),
Expand Down
2 changes: 1 addition & 1 deletion internal/activities/validate_files.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ func (a *ValidateFiles) identifyFormats(ctx context.Context, sip sip.SIP) (fileF

ff, err := a.identifier.Identify(path)
if err != nil {
logger.Info("format identication failed", "path", path)
logger.Info("format identification failed", "path", path)
} else {
formats[path] = ff
}
Expand Down
31 changes: 24 additions & 7 deletions internal/activities/validate_structure.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,14 @@ func (a *ValidateStructure) Execute(
) (*ValidateStructureResult, error) {
var failures []string

// Check existence of content and XSD folders.
// Check existence of the content directory.
hasContentDir := true
if !fsutil.FileExists(params.SIP.ContentPath) {
failures = append(failures, "Content folder is missing")
hasContentDir = false
}

// Check existence of the XSD directory.
if !fsutil.FileExists(params.SIP.XSDPath) {
failures = append(failures, "XSD folder is missing")
}
Expand All @@ -51,15 +53,20 @@ func (a *ValidateStructure) Execute(
))
}

// Check existence of UpdatedAreldaMetadata file (digitized AIP only).
if params.SIP.Type == enums.SIPTypeDigitizedAIP && !fsutil.FileExists(params.SIP.UpdatedAreldaMDPath) {
// Check existence of UpdatedAreldaMetadata file (AIPs only).
if params.SIP.IsAIP() && !fsutil.FileExists(params.SIP.UpdatedAreldaMDPath) {
failures = append(failures, fmt.Sprintf(
"%s is missing", filepath.Base(params.SIP.UpdatedAreldaMDPath),
))
}

sipBase := params.SIP.Path
// Check existence of logical metadata file (AIPs only).
if params.SIP.IsAIP() && !fsutil.FileExists(params.SIP.LogicalMDPath) {
failures = append(failures, fmt.Sprintf("%s is missing", filepath.Base(params.SIP.LogicalMDPath)))
}

// Check for unexpected top-level directories.
sipBase := params.SIP.Path
extras, err := extraNodes(sipBase, params.SIP.Path, params.SIP.TopLevelPaths, true)
if err != nil {
return nil, fmt.Errorf("ValidateStructure: check for unexpected dirs: %v", err)
Expand All @@ -75,14 +82,24 @@ func (a *ValidateStructure) Execute(
failures = append(failures, extras...)
}

// Check that digitized SIPs only have one dossier in the content dir.
if params.SIP.Type == enums.SIPTypeDigitizedSIP {
// Check that digitized packages only have one dossier in the content dir.
if params.SIP.Type == enums.SIPTypeDigitizedSIP || params.SIP.Type == enums.SIPTypeDigitizedAIP && hasContentDir {
entries, err := os.ReadDir(params.SIP.ContentPath)
if err != nil {
return nil, fmt.Errorf("ValidateStructure: check for unexpected dossiers: %v", err)
}

if len(entries) > 1 {
dirs := 0
for _, e := range entries {
if e.IsDir() {
dirs += 1
}
if dirs > 1 {
break
}
}

if dirs > 1 {
failures = append(failures, "More than one dossier in the content directory")
}
}
Expand Down
49 changes: 28 additions & 21 deletions internal/activities/validate_structure_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,27 +18,30 @@ func TestValidateStructure(t *testing.T) {
t.Parallel()

digitizedAIP, err := sip.New(fs.NewDir(t, "",
fs.WithDir("additional",
fs.WithFile("UpdatedAreldaMetadata.xml", ""),
),
fs.WithDir("content",
fs.WithDir("content",
fs.WithDir("d_0000001",
fs.WithFile("Prozess_Digitalisierung_PREMIS.xml", ""),
),
fs.WithDir("AIP-1234",
fs.WithDir("additional",
fs.WithFile("UpdatedAreldaMetadata.xml", ""),
fs.WithFile("AIP-1234-premis.xml", ""),
),
fs.WithDir("header",
fs.WithDir("old",
fs.WithDir("SIP",
fs.WithFile("metadata.xml", ""),
fs.WithDir("content",
fs.WithDir("content",
fs.WithDir("d_0000001",
fs.WithFile("Prozess_Digitalisierung_PREMIS.xml", ""),
),
),
fs.WithDir("xsd",
fs.WithFile("arelda.xsd", ""),
fs.WithDir("header",
fs.WithDir("old",
fs.WithDir("SIP",
fs.WithFile("metadata.xml", ""),
),
),
fs.WithDir("xsd",
fs.WithFile("arelda.xsd", ""),
),
),
),
),
).Path())
).Join("AIP-1234"))
assert.NilError(t, err)

digitizedSIP, err := sip.New(fs.NewDir(t, "",
Expand Down Expand Up @@ -76,10 +79,14 @@ func TestValidateStructure(t *testing.T) {
missingPiecesSIP, err := sip.New(fs.NewDir(t, "").Path())
assert.NilError(t, err)

missingPiecesAIP, err := sip.New(fs.NewDir(t, "",
fs.WithDir("additional"),
fs.WithFile("Prozess_Digitalisierung_PREMIS.xml", ""),
).Path())
missingPiecesAIP, err := sip.New(
fs.NewDir(t, "",
fs.WithDir("AIP-1234",
fs.WithDir("additional"),
fs.WithFile("Prozess_Digitalisierung_PREMIS.xml", ""),
),
).Join("AIP-1234"),
)
assert.NilError(t, err)

digitizedSIPExtraDossiers, err := sip.New(fs.NewDir(t, "",
Expand Down Expand Up @@ -156,6 +163,7 @@ func TestValidateStructure(t *testing.T) {
"XSD folder is missing",
"metadata.xml is missing",
"UpdatedAreldaMetadata.xml is missing",
"AIP-1234-premis.xml is missing",
},
},
},
Expand Down Expand Up @@ -188,11 +196,10 @@ func TestValidateStructure(t *testing.T) {

return
}
assert.NilError(t, err)

var result activities.ValidateStructureResult
_ = enc.Get(&result)

assert.NilError(t, err)
assert.DeepEqual(t, result, tt.want)
})
}
Expand Down
15 changes: 3 additions & 12 deletions internal/activities/verify_manifest.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ import (

goset "github.com/deckarep/golang-set/v2"

"github.com/artefactual-sdps/preprocessing-sfa/internal/enums"
"github.com/artefactual-sdps/preprocessing-sfa/internal/manifest"
"github.com/artefactual-sdps/preprocessing-sfa/internal/sip"
)
Expand Down Expand Up @@ -87,7 +86,7 @@ func manifestFiles(s sip.SIP) (map[string]*manifest.File, error) {
}

// Prefix "content/" to AIP file paths.
if isAIP(s.Type) {
if s.IsAIP() {
m := make(map[string]*manifest.File, len(files))
for k, v := range files {
m[filepath.Join("content", k)] = v
Expand All @@ -102,7 +101,7 @@ func manifestFiles(s sip.SIP) (map[string]*manifest.File, error) {
// (excluding directory) paths found.
func sipFiles(s sip.SIP) (goset.Set[string], error) {
root := s.Path
if isAIP(s.Type) {
if s.IsAIP() {
root = filepath.Join(s.Path, "content")
}

Expand All @@ -123,7 +122,7 @@ func sipFiles(s sip.SIP) (goset.Set[string], error) {

// SIPs don't include metadata.xml in the manifest, so ignore the file
// here.
if isSIP(s.Type) && p == "header/metadata.xml" {
if s.IsSIP() && p == "header/metadata.xml" {
return nil
}

Expand Down Expand Up @@ -239,11 +238,3 @@ func generateHash(path, alg string) (string, error) {

return hex.EncodeToString(h.Sum(nil)), nil
}

func isAIP(t enums.SIPType) bool {
return t == enums.SIPTypeBornDigitalAIP || t == enums.SIPTypeDigitizedAIP
}

func isSIP(t enums.SIPType) bool {
return t == enums.SIPTypeBornDigitalSIP || t == enums.SIPTypeDigitizedSIP
}
12 changes: 12 additions & 0 deletions internal/sip/sip.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ type SIP struct {
// ContentPath is the filepath of the "content" directory.
ContentPath string

// LogicalMDPath is the filepath of the logical metadata file (AIP only).
LogicalMDPath string

// ManifestPath is the filepath of the SIP manifest —
// "UpdatedAreldaMetadata.xml" for digitized AIPs, "metadata.xml" for all
// other SIP types.
Expand Down Expand Up @@ -71,6 +74,7 @@ func New(path string) (SIP, error) {
func (s SIP) digitizedAIP() SIP {
s.Type = enums.SIPTypeDigitizedAIP
s.ContentPath = filepath.Join(s.Path, "content", "content")
s.LogicalMDPath = filepath.Join(s.Path, "additional", s.Name()+"-premis.xml")
s.MetadataPath = filepath.Join(s.Path, "content", "header", "old", "SIP", "metadata.xml")
s.UpdatedAreldaMDPath = filepath.Join(s.Path, "additional", "UpdatedAreldaMetadata.xml")
s.ManifestPath = s.UpdatedAreldaMDPath
Expand Down Expand Up @@ -114,3 +118,11 @@ func (s SIP) bornDigitalSIP() SIP {
func (s SIP) Name() string {
return filepath.Base(s.Path)
}

func (s SIP) IsAIP() bool {
return s.Type == enums.SIPTypeBornDigitalAIP || s.Type == enums.SIPTypeDigitizedAIP
}

func (s SIP) IsSIP() bool {
return s.Type == enums.SIPTypeBornDigitalSIP || s.Type == enums.SIPTypeDigitizedSIP
}
Loading

0 comments on commit 3734440

Please sign in to comment.