diff --git a/go.mod b/go.mod index faf1a92a..46be9247 100644 --- a/go.mod +++ b/go.mod @@ -11,7 +11,6 @@ require ( github.com/go-logr/logr v1.4.2 github.com/google/uuid v1.6.0 github.com/hashicorp/go-cleanhttp v0.5.2 - github.com/nyudlts/go-bagit v0.3.0-alpha.0.20240515212815-8dab411c23af github.com/oklog/run v1.1.0 github.com/richardlehane/siegfried v1.11.1 github.com/spf13/pflag v1.0.5 @@ -63,6 +62,7 @@ require ( github.com/kluctl/go-embed-python v0.0.0-3.12.3-20240415-1 // indirect github.com/magiconair/properties v1.8.7 // indirect github.com/mitchellh/mapstructure v1.5.0 // indirect + github.com/nyudlts/go-bagit v0.3.0-alpha.0.20240515212815-8dab411c23af // indirect github.com/otiai10/copy v1.14.0 // indirect github.com/pborman/uuid v1.2.1 // indirect github.com/pelletier/go-toml/v2 v2.1.0 // indirect diff --git a/internal/activities/bagvalidate/README.md b/internal/activities/bagvalidate/README.md deleted file mode 100644 index fc2d21e8..00000000 --- a/internal/activities/bagvalidate/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# bagvalidate - -Work in progress. diff --git a/internal/activities/bagvalidate/activity.go b/internal/activities/bagvalidate/activity.go deleted file mode 100644 index ccabedbd..00000000 --- a/internal/activities/bagvalidate/activity.go +++ /dev/null @@ -1,57 +0,0 @@ -package bagvalidate - -import ( - "context" - "errors" - - "go.artefactual.dev/tools/temporal" -) - -const Name = "bag-validate" - -type ( - Params struct { - // Path is the full path of the Bag to be validated. - Path string - } - Result struct { - // Valid is true if the Bag is valid. - Valid bool - - // Error is a message indicating why validation failed, and will always be - // empty when Valid is true. - Error string - } - Activity struct { - validator BagValidator - } -) - -func New(validator BagValidator) *Activity { - return &Activity{validator: validator} -} - -// Execute validates the BagIt Bag located at Path. -// -// If validation succeeds Execute returns `&ValidateActivityResult{Valid: true}, -// nil`. -// If validation fails Execute returns `&ValidateActivityResult{Valid: false, -// Error: "message"}, nil`. -// If an application error occurs Execute returns `nil, error("message")` -func (a *Activity) Execute(ctx context.Context, params *Params) (*Result, error) { - logger := temporal.GetLogger(ctx) - logger.V(1).Info("Executing bag-validate activity", "Path", params.Path) - - if err := a.validator.Validate(params.Path); err != nil { - wrappedErr := errors.Unwrap(err) - - // Handle application errors. - if wrappedErr != ErrNotABag && wrappedErr != ErrInvalid { - return nil, err - } - - return &Result{Valid: false, Error: err.Error()}, nil - } - - return &Result{Valid: true}, nil -} diff --git a/internal/activities/bagvalidate/activity_test.go b/internal/activities/bagvalidate/activity_test.go deleted file mode 100644 index e343501d..00000000 --- a/internal/activities/bagvalidate/activity_test.go +++ /dev/null @@ -1,149 +0,0 @@ -package bagvalidate_test - -import ( - "errors" - "fmt" - "io/fs" - "testing" - - temporalsdk_activity "go.temporal.io/sdk/activity" - temporalsdk_testsuite "go.temporal.io/sdk/testsuite" - "gotest.tools/v3/assert" - tfs "gotest.tools/v3/fs" - - "github.com/artefactual-sdps/preprocessing-sfa/internal/activities/bagvalidate" -) - -const ( - dirMode fs.FileMode = 0o700 - fileMode fs.FileMode = 0o600 - sha512manifest string = `946af3bfd3b0b84ea0d99136085dcd66ee7e769371dbcd097ed35fd377116087e25d004afd68dc48e4eb0bcb6a434b04078577b531a7da1452296d1ae98d20b3 data/another.txt -8cbdd4ed5452f7c066509c066d5ea87fc03f30b0c67153624a1bce4d6e14b6709b5e78caf723cdf419d0efad4db96ba1cad3196783c26a7743029459bdd148b0 data/small.txt -` -) - -func validTestBag(t *testing.T) string { - d := tfs.NewDir(t, "temporal-activities-test", - tfs.WithFile( - "bag-info.txt", - `Bag-Software-Agent: bagvalidate.py v1.8.1 -Bagging-Date: 2024-07-04 -Payload-Oxum: 38.2 -`, - tfs.WithMode(fileMode), - ), - tfs.WithFile( - "bagit.txt", - `BagIt-Version: 0.97 -Tag-File-Character-Encoding: UTF-8`, - tfs.MatchAnyFileContent, tfs.WithMode(fileMode), - ), - tfs.WithFile("manifest-sha512.txt", sha512manifest, tfs.WithMode(fileMode)), - tfs.WithFile("tagmanifest-sha512.txt", "", tfs.MatchAnyFileContent, tfs.WithMode(fileMode)), - tfs.WithDir("data", tfs.WithMode(dirMode), - tfs.WithFile("another.txt", "I am another file.\n", tfs.WithMode(fileMode)), - tfs.WithFile("small.txt", "I am a small file.\n", tfs.WithMode(fileMode)), - ), - ) - - return d.Path() -} - -func invalidTestBag(t *testing.T) string { - d := tfs.NewDir(t, "temporal-activities-test", - tfs.WithFile( - "bag-info.txt", - `Bag-Software-Agent: bagvalidate.py v1.8.1 -Bagging-Date: 2024-07-04 -Payload-Oxum: 38.2 -`, - tfs.WithMode(fileMode), - ), - tfs.WithFile( - "bagit.txt", - `BagIt-Version: 0.97 -Tag-File-Character-Encoding: UTF-8`, - tfs.MatchAnyFileContent, tfs.WithMode(fileMode), - ), - tfs.WithFile("manifest-sha512.txt", sha512manifest, tfs.WithMode(fileMode)), - tfs.WithFile("tagmanifest-sha512.txt", "", tfs.MatchAnyFileContent, tfs.WithMode(fileMode)), - tfs.WithDir("data", tfs.WithMode(dirMode), - tfs.WithFile("small.txt", "I am a small file.\n", tfs.WithMode(fileMode)), - ), - ) - - return d.Path() -} - -func TestActivity(t *testing.T) { - t.Parallel() - - type test struct { - name string - params bagvalidate.Params - want bagvalidate.Result - } - for _, tt := range []test{ - { - name: "Validates a bag", - params: bagvalidate.Params{ - Path: validTestBag(t), - }, - want: bagvalidate.Result{ - Valid: true, - }, - }, - { - name: "Returns a validation error", - params: bagvalidate.Params{ - Path: invalidTestBag(t), - }, - want: bagvalidate.Result{ - Valid: false, - Error: "invalid: payload-oxum validation failed. expected 2 files and 38 bytes but found 1 files and 19 bytes", - }, - }, - } { - tt := tt - t.Run(tt.name, func(t *testing.T) { - t.Parallel() - - validator := bagvalidate.NewValidator() - - // Execute activity with test data. - ts := &temporalsdk_testsuite.WorkflowTestSuite{} - env := ts.NewTestActivityEnvironment() - env.RegisterActivityWithOptions( - bagvalidate.New(validator).Execute, - temporalsdk_activity.RegisterOptions{Name: bagvalidate.Name}, - ) - - enc, _ := env.ExecuteActivity(bagvalidate.Name, tt.params) - - // Test activity result. - var result bagvalidate.Result - fmt.Println(result) - _ = enc.Get(&result) - assert.DeepEqual(t, result, tt.want) - }) - } -} - -func TestActivitySystemError(t *testing.T) { - t.Parallel() - - validator := bagvalidate.NewMockValidator().SetErr(errors.New("transporter accident")) - ts := &temporalsdk_testsuite.WorkflowTestSuite{} - env := ts.NewTestActivityEnvironment() - env.RegisterActivityWithOptions( - bagvalidate.New(validator).Execute, - temporalsdk_activity.RegisterOptions{Name: bagvalidate.Name}, - ) - - _, err := env.ExecuteActivity(bagvalidate.Name, bagvalidate.Params{}) - assert.Error( - t, - err, - "activity error (type: bag-validate, scheduledEventID: 0, startedEventID: 0, identity: ): transporter accident", - ) -} diff --git a/internal/activities/bagvalidate/errors.go b/internal/activities/bagvalidate/errors.go deleted file mode 100644 index d5c82402..00000000 --- a/internal/activities/bagvalidate/errors.go +++ /dev/null @@ -1,18 +0,0 @@ -package bagvalidate - -import ( - "errors" - "strings" -) - -var ErrNotABag = errors.New("not a bag") - -var ErrInvalid = errors.New("invalid") - -func RemovePathFromError(path string, err error) string { - // Remove path from validation messages. - message := strings.Replace(err.Error(), path+" is invalid: ", "", 1) - - // Convert to lower case. - return strings.ToLower(message) -} diff --git a/internal/activities/bagvalidate/mock_validator.go b/internal/activities/bagvalidate/mock_validator.go deleted file mode 100644 index 8fca3d12..00000000 --- a/internal/activities/bagvalidate/mock_validator.go +++ /dev/null @@ -1,20 +0,0 @@ -package bagvalidate - -type mockValidator struct { - err error -} - -func NewMockValidator() *mockValidator { - return &mockValidator{} -} - -func (m mockValidator) Validate(_ string) error { - return m.err -} - -func (m *mockValidator) SetErr(e error) *mockValidator { - m.err = e - return m -} - -var _ BagValidator = mockValidator{} diff --git a/internal/activities/bagvalidate/mock_validator_test.go b/internal/activities/bagvalidate/mock_validator_test.go deleted file mode 100644 index 640eab5d..00000000 --- a/internal/activities/bagvalidate/mock_validator_test.go +++ /dev/null @@ -1,33 +0,0 @@ -package bagvalidate_test - -import ( - "errors" - "testing" - - "gotest.tools/v3/assert" - - "github.com/artefactual-sdps/preprocessing-sfa/internal/activities/bagvalidate" -) - -func TestMockValidator(t *testing.T) { - t.Parallel() - t.Run("Return a nil error", func(t *testing.T) { - t.Parallel() - v := bagvalidate.NewMockValidator() - assert.NilError(t, v.Validate("")) - }) - - t.Run("Return an ErrInvalid error", func(t *testing.T) { - t.Parallel() - v := bagvalidate.NewMockValidator().SetErr(bagvalidate.ErrInvalid) - assert.ErrorIs(t, v.Validate(""), bagvalidate.ErrInvalid) - }) - - t.Run("Return a general error", func(t *testing.T) { - t.Parallel() - v := bagvalidate.NewMockValidator().SetErr(errors.New("system error")) - err := v.Validate("") - assert.Assert(t, !errors.Is(err, bagvalidate.ErrInvalid)) - assert.Error(t, err, "system error") - }) -} diff --git a/internal/activities/bagvalidate/validator.go b/internal/activities/bagvalidate/validator.go deleted file mode 100644 index abc62e4c..00000000 --- a/internal/activities/bagvalidate/validator.go +++ /dev/null @@ -1,52 +0,0 @@ -package bagvalidate - -import ( - "fmt" - "os" - "path/filepath" - - gobagit "github.com/nyudlts/go-bagit" -) - -type BagValidator interface { - Validate(path string) error -} - -type noopValidator struct{} - -func (v noopValidator) Validate(path string) error { - return nil -} - -type validator struct{} - -func (v validator) Validate(path string) error { - // Check if path is a bag. - if _, err := os.Stat(filepath.Join(path, "bagit.txt")); err != nil { - // Do nothing if not a bag (bagit.txt doesn't exist). - return fmt.Errorf("%w: %s", ErrNotABag, "bagit.txt not found") - } - - // Validate bag. - b, err := gobagit.GetExistingBag(path) - if err != nil { - return fmt.Errorf("%w: %s", ErrInvalid, RemovePathFromError(path, err)) - } - - err = b.ValidateBag(true, false) - if err != nil { - return fmt.Errorf("%w: %s", ErrInvalid, RemovePathFromError(path, err)) - } - - return nil -} - -func NewNoopValidator() noopValidator { - return noopValidator{} -} - -func NewValidator() validator { - return validator{} -} - -var _ BagValidator = validator{} diff --git a/internal/activities/bagvalidate/validator_test.go b/internal/activities/bagvalidate/validator_test.go deleted file mode 100644 index 5c10d33f..00000000 --- a/internal/activities/bagvalidate/validator_test.go +++ /dev/null @@ -1,94 +0,0 @@ -package bagvalidate_test - -import ( - "testing" - - "gotest.tools/v3/assert" - "gotest.tools/v3/fs" - - "github.com/artefactual-sdps/preprocessing-sfa/internal/activities/bagvalidate" -) - -const ( - textFileTxtCorrect = `This is a Test file -` - - bagInfoTxt = `Bag-Software-Agent: bagit.py v1.8.1 -Bagging-Date: 2021-10-11 -Payload-Oxum: 20.1 -` - - bagitTxt = `BagIt-Version: 0.97 -Tag-File-Character-Encoding: UTF-8 -` - - manifestSha256Txt = `20cd2eb771177035f483363951203be7cd85f176aaa7d124a56eb4c83562a861 data/test-file.txt` - - tagManifestSha256Text = `e91f941be5973ff71f1dccbdd1a32d598881893a7f21be516aca743da38b1689 bagit.txt -c4600f10b98eb9f179781387e7ce80ff89b4a29793be74ccd037b44b0bf27c00 bag-info.txt -4698e56fb06c495df8f928fd3158d274ca070cc066a770ecb5cc364a9ff12edc manifest-sha256.txt` -) - -func TestValidator(t *testing.T) { - t.Parallel() - - tests := []struct { - name string - bagPath string - wantErr string - }{ - { - name: "Validate non-bag", - bagPath: fs.NewDir(t, "", - fs.WithDir("data", - fs.WithFile("test-file.txt", textFileTxtCorrect), - ), - ).Path(), - wantErr: "not a bag: bagit.txt not found", - }, - { - name: "Validate valid bag", - bagPath: fs.NewDir(t, "", - fs.WithDir("data", - fs.WithFile("test-file.txt", textFileTxtCorrect), - ), - fs.WithFile("bag-info.txt", bagInfoTxt), - fs.WithFile("bagit.txt", bagitTxt), - fs.WithFile("manifest-sha256.txt", manifestSha256Txt), - fs.WithFile("tagmanifest-sha256.txt", tagManifestSha256Text), - ).Path(), - }, - { - name: "Validate invalid bag", - bagPath: fs.NewDir(t, "", - fs.WithDir("data", - fs.WithFile("test-file.txt", "This is wrong"), - ), - fs.WithFile("bag-info.txt", bagInfoTxt), - fs.WithFile("bagit.txt", bagitTxt), - fs.WithFile("manifest-sha256.txt", manifestSha256Txt), - fs.WithFile("tagmanifest-sha256.txt", tagManifestSha256Text), - ).Path(), - wantErr: "payload-oxum validation failed. expected 1 files and 20 bytes but found 1 files and 13 bytes", - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - v := bagvalidate.NewValidator() - err := v.Validate(tt.bagPath) - - if tt.wantErr != "" { - if err == nil { - t.Errorf("error is nil, expecting: %q", tt.wantErr) - } - - assert.ErrorContains(t, err, tt.wantErr) - } - }) - } -} - -func TestNoopValidator(t *testing.T) { - v := bagvalidate.NewNoopValidator() - assert.NilError(t, v.Validate("")) -}