From 5d924d99ea55c7a2b7fe98c5d3b7324ff6e113a6 Mon Sep 17 00:00:00 2001 From: Mike Cantelon Date: Thu, 14 Nov 2024 09:21:42 -0800 Subject: [PATCH] Fix SIP type identification (#79) Fixed and updated SIP type identification logic and updated related tests. --- internal/activities/identify_sip_test.go | 8 +++- internal/activities/transform_sip.go | 4 +- internal/activities/transform_sip_test.go | 2 + .../activities/validate_structure_test.go | 5 +- .../activities/write_identifier_file_test.go | 2 +- internal/enums/sip_type.go | 3 +- internal/enums/sip_type_enum.go | 12 +++-- internal/pips/pips_test.go | 10 ++-- internal/premis/premis_test.go | 2 +- internal/sip/sip.go | 37 +++++++++----- internal/sip/sip_test.go | 48 ++++++++++++++----- 11 files changed, 95 insertions(+), 38 deletions(-) diff --git a/internal/activities/identify_sip_test.go b/internal/activities/identify_sip_test.go index 1f1c8b06..36a7c6c4 100644 --- a/internal/activities/identify_sip_test.go +++ b/internal/activities/identify_sip_test.go @@ -17,7 +17,13 @@ import ( func TestIdentifySIP(t *testing.T) { t.Parallel() - path := fs.NewDir(t, "", fs.WithDir("content"), fs.WithDir("additional")).Path() + path := fs.NewDir(t, "", + fs.WithDir("content", + fs.WithDir("d_0000001", + fs.WithFile("Prozess_Digitalisierung_PREMIS.xml", ""), + ), + ), + fs.WithDir("additional")).Path() tests := []struct { name string diff --git a/internal/activities/transform_sip.go b/internal/activities/transform_sip.go index 276d0893..c5ad70e8 100644 --- a/internal/activities/transform_sip.go +++ b/internal/activities/transform_sip.go @@ -38,8 +38,8 @@ func (a *TransformSIP) Execute(ctx context.Context, params *TransformSIPParams) // Move the Prozess_Digitalisierung_PREMIS.xml file to the PIP metadata // directory. Prozess_Digitalisierung_PREMIS.xml is only present in - // digitized SIPs, and there can only be one dossier in a digitized SIP. - if params.SIP.Type == enums.SIPTypeDigitizedSIP { + // digitized SIPs/AIPs, and there can only be one dossier in a digitized SIP/AIP. + if params.SIP.Type == enums.SIPTypeDigitizedSIP || params.SIP.Type == enums.SIPTypeDigitizedAIP { entries, err := os.ReadDir(params.SIP.ContentPath) if err != nil { return nil, err diff --git a/internal/activities/transform_sip_test.go b/internal/activities/transform_sip_test.go index 84c441ad..09bc9408 100644 --- a/internal/activities/transform_sip_test.go +++ b/internal/activities/transform_sip_test.go @@ -32,6 +32,7 @@ func TestTransformSIP(t *testing.T) { fs.WithDir("d_0000001", fs.WithFile("00000001.jp2", ""), fs.WithFile("00000001_PREMIS.xml", ""), + fs.WithFile("Prozess_Digitalisierung_PREMIS.xml", ""), ), ), fs.WithDir("header", @@ -84,6 +85,7 @@ func TestTransformSIP(t *testing.T) { ), fs.WithDir("metadata", fs.WithMode(dmode), fs.WithFile("UpdatedAreldaMetadata.xml", "", fs.WithMode(fmode)), + fs.WithFile("Prozess_Digitalisierung_PREMIS.xml", "", fs.WithMode(fmode)), ), ) diff --git a/internal/activities/validate_structure_test.go b/internal/activities/validate_structure_test.go index d22a3604..39a126a3 100644 --- a/internal/activities/validate_structure_test.go +++ b/internal/activities/validate_structure_test.go @@ -23,7 +23,9 @@ func TestValidateStructure(t *testing.T) { ), fs.WithDir("content", fs.WithDir("content", - fs.WithDir("d_0000001"), + fs.WithDir("d_0000001", + fs.WithFile("Prozess_Digitalisierung_PREMIS.xml", ""), + ), ), fs.WithDir("header", fs.WithDir("old", @@ -76,6 +78,7 @@ func TestValidateStructure(t *testing.T) { missingPiecesAIP, err := sip.New(fs.NewDir(t, "", fs.WithDir("additional"), + fs.WithFile("Prozess_Digitalisierung_PREMIS.xml", ""), ).Path()) assert.NilError(t, err) diff --git a/internal/activities/write_identifier_file_test.go b/internal/activities/write_identifier_file_test.go index 05a66251..db37856c 100644 --- a/internal/activities/write_identifier_file_test.go +++ b/internal/activities/write_identifier_file_test.go @@ -35,7 +35,7 @@ func TestWriteIdentifierFile(t *testing.T) { enums.SIPTypeDigitizedSIP, ) - pipNoManifest := pips.New(fs.NewDir(t, "").Path(), enums.SIPTypeBornDigital) + pipNoManifest := pips.New(fs.NewDir(t, "").Path(), enums.SIPTypeBornDigitalSIP) pipEmptyManifest := pips.New( fs.NewDir(t, "", diff --git a/internal/enums/sip_type.go b/internal/enums/sip_type.go index 4cc8c53e..a19844da 100644 --- a/internal/enums/sip_type.go +++ b/internal/enums/sip_type.go @@ -3,6 +3,7 @@ package enums // ENUM( // DigitizedAIP, // DigitizedSIP, -// BornDigital, +// BornDigitalAIP, +// BornDigitalSIP, // ). type SIPType string diff --git a/internal/enums/sip_type_enum.go b/internal/enums/sip_type_enum.go index d7a67a93..3fe88927 100644 --- a/internal/enums/sip_type_enum.go +++ b/internal/enums/sip_type_enum.go @@ -18,8 +18,10 @@ const ( SIPTypeDigitizedAIP SIPType = "DigitizedAIP" // SIPTypeDigitizedSIP is a SIPType of type DigitizedSIP. SIPTypeDigitizedSIP SIPType = "DigitizedSIP" - // SIPTypeBornDigital is a SIPType of type BornDigital. - SIPTypeBornDigital SIPType = "BornDigital" + // SIPTypeBornDigitalAIP is a SIPType of type BornDigitalAIP. + SIPTypeBornDigitalAIP SIPType = "BornDigitalAIP" + // SIPTypeBornDigitalSIP is a SIPType of type BornDigitalSIP. + SIPTypeBornDigitalSIP SIPType = "BornDigitalSIP" ) var ErrInvalidSIPType = fmt.Errorf("not a valid SIPType, try [%s]", strings.Join(_SIPTypeNames, ", ")) @@ -27,7 +29,8 @@ var ErrInvalidSIPType = fmt.Errorf("not a valid SIPType, try [%s]", strings.Join var _SIPTypeNames = []string{ string(SIPTypeDigitizedAIP), string(SIPTypeDigitizedSIP), - string(SIPTypeBornDigital), + string(SIPTypeBornDigitalAIP), + string(SIPTypeBornDigitalSIP), } // SIPTypeNames returns a list of possible string values of SIPType. @@ -52,7 +55,8 @@ func (x SIPType) IsValid() bool { var _SIPTypeValue = map[string]SIPType{ "DigitizedAIP": SIPTypeDigitizedAIP, "DigitizedSIP": SIPTypeDigitizedSIP, - "BornDigital": SIPTypeBornDigital, + "BornDigitalAIP": SIPTypeBornDigitalAIP, + "BornDigitalSIP": SIPTypeBornDigitalSIP, } // ParseSIPType attempts to convert a string to a SIPType. diff --git a/internal/pips/pips_test.go b/internal/pips/pips_test.go index e14d3a0a..5cfd1149 100644 --- a/internal/pips/pips_test.go +++ b/internal/pips/pips_test.go @@ -61,13 +61,13 @@ func TestNewFromSIP(t *testing.T) { t.Parallel() s := sip.SIP{ - Path: "/path/to/born_digital_AIP_12345", - Type: enums.SIPTypeBornDigital, + Path: "/path/to/born_digital_SIP_12345", + Type: enums.SIPTypeBornDigitalSIP, } assert.DeepEqual(t, pips.NewFromSIP(s), pips.PIP{ - Path: "/path/to/born_digital_AIP_12345", - Type: enums.SIPTypeBornDigital, - ManifestPath: "/path/to/born_digital_AIP_12345/objects/born_digital_AIP_12345/header/metadata.xml", + Path: "/path/to/born_digital_SIP_12345", + Type: enums.SIPTypeBornDigitalSIP, + ManifestPath: "/path/to/born_digital_SIP_12345/objects/born_digital_SIP_12345/header/metadata.xml", }) } diff --git a/internal/premis/premis_test.go b/internal/premis/premis_test.go index 50651f7f..d4c875c3 100644 --- a/internal/premis/premis_test.go +++ b/internal/premis/premis_test.go @@ -328,7 +328,7 @@ func TestOriginalNameForSubpath(t *testing.T) { // Check for correct adjustment of born digital SIP file path in PREMIS. bornDigitalSIP := sip.SIP{ - Type: enums.SIPTypeBornDigital, + Type: enums.SIPTypeBornDigitalSIP, Path: "test_transfer", ContentPath: "test_transfer/content", } diff --git a/internal/sip/sip.go b/internal/sip/sip.go index f8ad48da..94dba5fa 100644 --- a/internal/sip/sip.go +++ b/internal/sip/sip.go @@ -46,19 +46,27 @@ func New(path string) (SIP, error) { } s.Path = path - if fsutil.FileExists(filepath.Join(s.Path, "additional")) { - return s.digitizedAIP(), nil - } - f, err := fsutil.FindFilename(s.Path, "Prozess_Digitalisierung_PREMIS.xml") if err != nil { return s, fmt.Errorf("SIP: New: %v", err) } - if len(f) > 0 { - return s.digitizedSIP(), nil + hasProzessFile := len(f) > 0 + + hasAdditionalDir := fsutil.FileExists(filepath.Join(s.Path, "additional")) + + if hasProzessFile { + if hasAdditionalDir { + return s.digitizedAIP(), nil + } else { + return s.digitizedSIP(), nil + } + } else { + if hasAdditionalDir { + return s.bornDigitalAIP(), nil + } else { + return s.bornDigitalSIP(), nil + } } - - return s.bornDigital(), nil } func (s SIP) digitizedAIP() SIP { @@ -77,14 +85,21 @@ func (s SIP) digitizedAIP() SIP { } func (s SIP) digitizedSIP() SIP { - s = s.bornDigital() + s = s.bornDigitalSIP() s.Type = enums.SIPTypeDigitizedSIP return s } -func (s SIP) bornDigital() SIP { - s.Type = enums.SIPTypeBornDigital +func (s SIP) bornDigitalAIP() SIP { + s = s.bornDigitalSIP() + s.Type = enums.SIPTypeBornDigitalAIP + + return s +} + +func (s SIP) bornDigitalSIP() SIP { + s.Type = enums.SIPTypeBornDigitalSIP s.ContentPath = filepath.Join(s.Path, "content") s.MetadataPath = filepath.Join(s.Path, "header", "metadata.xml") s.ManifestPath = s.MetadataPath diff --git a/internal/sip/sip_test.go b/internal/sip/sip_test.go index ad614485..b7f42e23 100644 --- a/internal/sip/sip_test.go +++ b/internal/sip/sip_test.go @@ -14,7 +14,11 @@ func TestNew(t *testing.T) { t.Parallel() digitizedAIP := fs.NewDir(t, "Test-AIP-Digitization", - fs.WithDir("content"), + fs.WithDir("content", + fs.WithDir("d_0000001", + fs.WithFile("Prozess_Digitalisierung_PREMIS.xml", ""), + ), + ), fs.WithDir("additional"), ) @@ -36,7 +40,13 @@ func TestNew(t *testing.T) { fs.WithDir("header"), ) - bornDigital := fs.NewDir(t, "", + bornDigitalAIP := fs.NewDir(t, "", + fs.WithDir("additional"), + fs.WithDir("content"), + fs.WithDir("header"), + ) + + bornDigitalSIP := fs.NewDir(t, "", fs.WithDir("content"), fs.WithDir("header"), ) @@ -96,19 +106,35 @@ func TestNew(t *testing.T) { }, }, }, + { + name: "Creates a new born digital AIP", + path: bornDigitalAIP.Path(), + wantSIP: sip.SIP{ + Type: enums.SIPTypeBornDigitalAIP, + Path: bornDigitalAIP.Path(), + ContentPath: bornDigitalAIP.Join("content"), + ManifestPath: bornDigitalAIP.Join("header", "metadata.xml"), + MetadataPath: bornDigitalAIP.Join("header", "metadata.xml"), + XSDPath: bornDigitalAIP.Join("header", "xsd", "arelda.xsd"), + TopLevelPaths: []string{ + bornDigitalAIP.Join("content"), + bornDigitalAIP.Join("header"), + }, + }, + }, { name: "Creates a new born digital SIP", - path: bornDigital.Path(), + path: bornDigitalSIP.Path(), wantSIP: sip.SIP{ - Type: enums.SIPTypeBornDigital, - Path: bornDigital.Path(), - ContentPath: bornDigital.Join("content"), - ManifestPath: bornDigital.Join("header", "metadata.xml"), - MetadataPath: bornDigital.Join("header", "metadata.xml"), - XSDPath: bornDigital.Join("header", "xsd", "arelda.xsd"), + Type: enums.SIPTypeBornDigitalSIP, + Path: bornDigitalSIP.Path(), + ContentPath: bornDigitalSIP.Join("content"), + ManifestPath: bornDigitalSIP.Join("header", "metadata.xml"), + MetadataPath: bornDigitalSIP.Join("header", "metadata.xml"), + XSDPath: bornDigitalSIP.Join("header", "xsd", "arelda.xsd"), TopLevelPaths: []string{ - bornDigital.Join("content"), - bornDigital.Join("header"), + bornDigitalSIP.Join("content"), + bornDigitalSIP.Join("header"), }, }, },