Skip to content

Commit

Permalink
Detect invalid characters in file/dir names (#109)
Browse files Browse the repository at this point in the history
Added logic to validate structure to return validation errors if a file
or directory's name contains characters incompatible with
Archivematica.
  • Loading branch information
mcantelon committed Jan 28, 2025
1 parent 8d59bdc commit 067b43b
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 6 deletions.
30 changes: 24 additions & 6 deletions internal/activities/validate_structure.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"os"
"path/filepath"
"slices"
"strings"

"github.com/artefactual-sdps/preprocessing-sfa/internal/enums"
"github.com/artefactual-sdps/preprocessing-sfa/internal/fsutil"
Expand Down Expand Up @@ -35,20 +36,24 @@ func (a *ValidateStructure) Execute(
) (*ValidateStructureResult, error) {
var failures []string

// Check for empty directories.
// Check for empty directories and invalid (Archivematica incompatible) file/directory names.
paths := make(map[string]int)

err := filepath.WalkDir(params.SIP.Path, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}

if path != params.SIP.Path {
relativePath, err := filepath.Rel(params.SIP.Path, path)
if err != nil {
return err
}
relativePath, err := filepath.Rel(params.SIP.Path, path)
if err != nil {
return err
}

if !validateName(d.Name()) {
failures = append(failures, fmt.Sprintf("Name %q contains invalid character", relativePath))
}

if path != params.SIP.Path {
// Initialize this directory's total number of immediate children.
if d.IsDir() {
paths[relativePath] = 0
Expand Down Expand Up @@ -171,3 +176,16 @@ func extraNodes(sipBase, path string, expected []string, matchDir bool) ([]strin

return extras, nil
}

// validateName makes sure only valid characters exist in name.
func validateName(name string) bool {
const validChars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.()"

for i := range len(name) {
if !strings.Contains(validChars, string(name[i])) {
return false
}
}

return true
}
29 changes: 29 additions & 0 deletions internal/activities/validate_structure_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,25 @@ func TestValidateStructure(t *testing.T) {
).Path())
assert.NilError(t, err)

badNamingSIP, err := sip.New(fs.NewDir(t, "",
fs.WithDir("content",
fs.WithDir("d_0000001",
fs.WithFile("content.txt", ""),
),
),
fs.WithDir("header",
fs.WithFile("content!.txt", ""),
fs.WithFile("metadata.xml", ""),
fs.WithDir("xsd",
fs.WithFile("arelda.xsd", ""),
),
fs.WithDir("directory$",
fs.WithFile("data.xml", ""),
),
),
).Path())
assert.NilError(t, err)

tests := []struct {
name string
params activities.ValidateStructureParams
Expand Down Expand Up @@ -180,6 +199,16 @@ func TestValidateStructure(t *testing.T) {
Failures: []string{"More than one dossier in the content directory"},
},
},
{
name: "Returns a failure when the name of files and/or directories in a SIP have invalid characters",
params: activities.ValidateStructureParams{SIP: badNamingSIP},
want: activities.ValidateStructureResult{
Failures: []string{
"Name \"header/content!.txt\" contains invalid character",
"Name \"header/directory$\" contains invalid character",
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand Down

0 comments on commit 067b43b

Please sign in to comment.