Skip to content

Commit

Permalink
Send PIPs to Archivematica as BagIt bags
Browse files Browse the repository at this point in the history
Fixes #805

- Change the package type to "zipped bag" when starting a transfer via
  the Archivematica API
- Bag the PIP before sending it to Archivematica (if it's not already a
  bag)
- Add a "TransferSourcePath" config value to specify the API path to the
  Transfer Source directory where PIPs are uploaded
  • Loading branch information
djjuhasz committed Aug 22, 2024
1 parent 1056c5f commit 129c30c
Show file tree
Hide file tree
Showing 12 changed files with 257 additions and 90 deletions.
4 changes: 4 additions & 0 deletions cmd/enduro-am-worker/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,10 @@ func main() {
activities.NewBundleActivity(logger).Execute,
temporalsdk_activity.RegisterOptions{Name: activities.BundleActivityName},
)
w.RegisterActivityWithOptions(
bagit_activity.NewCreateBagActivity(cfg.BagIt).Execute,
temporalsdk_activity.RegisterOptions{Name: bagit_activity.CreateBagActivityName},
)
w.RegisterActivityWithOptions(
activities.NewZipActivity(
logger,
Expand Down
37 changes: 19 additions & 18 deletions docs/src/dev-manual/archivematica.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,44 +24,44 @@ inside the cluster, and they are not tracked in the repository.

- Location: `hack/kube/overlays/dev-am/.am.secret`
- **Contents to check:**
- AM API address (e.g.,`http://host.k3d.internal:62080`)
- User credentials (`user=test`, `api_key=test`)
- SFTP configuration
details (`sftp_host=`, `sftp_port=`, `sftp_user=`, `sftp_remote_dir=`,
`sftp_private_key_passphrase=`).
- Archivematica Storage Service location details (`amss_url=`,
`amss_user=`, `amss_api_key=`, `amss_location_id=`). These details are
used by the *mysql-create-amss-location-job.yaml* job to add an AMSS
location to the *enduro_storage* database, and allow AIP download from the
Enduro Dashboard.
- AM API address (e.g.,`http://host.k3d.internal:62080`)
- User credentials (`user=test`, `api_key=test`)
- SFTP configuration
details (`sftp_host=`, `sftp_port=`, `sftp_user=`, `sftp_remote_dir=`,
`sftp_private_key_passphrase=`).
- Archivematica Storage Service location details (`amss_url=`,
`amss_user=`, `amss_api_key=`, `amss_location_id=`). These details are
used by the _mysql-create-amss-location-job.yaml_ job to add an AMSS
location to the _enduro_storage_ database, and allow AIP download from the
Enduro Dashboard.

#### `.id_ed25519.secret`

- Location: `hack/kube/overlays/dev-am/.id_ed25519.secret`
- **Contents to check:**
- SSH private key (Ensure it starts with `-----BEGIN
OPENSSH PRIVATE KEY-----` and ends with `-----END
OPENSSH PRIVATE KEY-----`)
- SSH private key (Ensure it starts with `-----BEGIN
OPENSSH PRIVATE KEY-----` and ends with `-----END
OPENSSH PRIVATE KEY-----`)

#### `.known_hosts.secret`

- Location: `hack/kube/overlays/dev-am/.known_hosts.secret`
- **Contents to check:**
- Known hosts entries (Look for entries starting with
`|1|` and containing `ssh-rsa`, `ecdsa-sha2-nistp256`,
`ssh-ed25519` etc.)
- Known hosts entries (Look for entries starting with
`|1|` and containing `ssh-rsa`, `ecdsa-sha2-nistp256`,
`ssh-ed25519` etc.)

#### `.tilt.env`

- Location: `root/`
- **Contents to check:**
- `ENDURO_PRES_SYSTEM = "am"`
- `ENDURO_PRES_SYSTEM = "am"`

#### `enduro.toml`

- Location: `root/`
- **Contents to check:**
- `[preservation] taskQueue` variable must be set to "am"
- `[preservation] taskQueue` variable must be set to "am"

!!! note

Expand All @@ -76,6 +76,7 @@ AM API and SFTP configuration:
address=http://host.k3d.internal:62080
user=test
api_key=test
transfer_source_path=4f7c29ff-ecdf-4acc-a426-2b5441457759:enduro-source
sftp_host=host.k3d.internal
sftp_port=2222
sftp_user=archivematica
Expand Down
24 changes: 20 additions & 4 deletions enduro.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ corsOrigin = "http://localhost"

[api.auth]
# Enable API authentication. OIDC is the only protocol supported at the
# moment. When enabled the API verifies the access token submitted with
# moment. When enabled the API verifies the access token submitted with
# each request. The API client is responsible for obtaining an access
# token from the provider.
enabled = true
Expand All @@ -29,7 +29,7 @@ enabled = true
# OIDC provider URL. Required when auth. is enabled.
providerURL = "http://keycloak:7470/realms/artefactual"
# OIDC client ID. The client ID must be included in the `aud` claim of
# the access token. Required when auth. is enabled.
# the access token. Required when auth. is enabled.
clientID = "enduro"

[api.auth.oidc.abac]
Expand All @@ -39,7 +39,7 @@ clientID = "enduro"
enabled = true
# Claim path of the Enduro attributes within the access token. If the claim
# path is nested then include all fields separated by `claimPathSeparator`
# (see below). E.g. "attributes.enduro" with `claimPathSeparator = "."`.
# (see below). E.g. "attributes.enduro" with `claimPathSeparator = "."`.
# Required when ABAC is enabled.
claimPath = "enduro"
# Separator used to split the claim path fields. The default value of "" will
Expand Down Expand Up @@ -152,12 +152,28 @@ pollInterval = "10s"
# no time limit.
transferDeadline = "1h"

# TransferSourcePath is the path to an Archivematica transfer source directory.
# It is used in the POST /api/v2beta/package "path" parameter to start a
# transfer via the API. TransferSourcePath must be prefixed with the UUID of an
# AMSS transfer source directory, optionally followed by a relative path from
# the source dir (e.g. "749ef452-fbed-4d50-9072-5f98bc01e52e:sftp_upload"). If
# no transferSourcPath is specified, the default transfer source path will be
# used.
transferSourcePath = ""

[am.sftp]
host = "" # The Archivematica Storage Service hostname.
port = ""
user = ""

# knownHostsFile is the absolute path to a local SSH "known_hosts" file that
# includes a public host key for the AM SFTP server.
# Default: "/home/[user]/.ssh/known_hosts" (where [user] is your local user).
knownHostsFile = ""
remoteDir = "/transfer_source"

# remoteDir is the directory path, relative to the SFTP root directory, where
# PIPs should be uploaded.
remoteDir = ""

[am.sftp.privateKey]
path = ""
Expand Down
5 changes: 5 additions & 0 deletions hack/kube/overlays/dev-am/enduro-am.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ spec:
secretKeyRef:
name: enduro-am-secret
key: api_key
- name: ENDURO_AM_TRANSFERSOURCEPATH
valueFrom:
secretKeyRef:
name: enduro-am-secret
key: transfer_source_path
- name: ENDURO_AM_SFTP_HOST
valueFrom:
secretKeyRef:
Expand Down
8 changes: 8 additions & 0 deletions internal/am/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,14 @@ type Config struct {
// SFTP configuration for uploading transfers to Archivematica.
SFTP sftp.Config

// TransferSourcePath is the path to an Archivematica transfer source
// directory. It is used in the POST /api/v2beta/package "path" parameter
// to start a transfer via the API. TransferSourcePath must be prefixed with
// the UUID of an AMSS transfer source directory, optionally followed by a
// relative path from the source dir (e.g.
// "749ef452-fbed-4d50-9072-5f98bc01e52e:sftp_upload").
TransferSourcePath string

// Capacity sets the maximum number of worker sessions the worker can
// handle at one time (default: 1).
Capacity int
Expand Down
17 changes: 13 additions & 4 deletions internal/am/start_transfer.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package am

import (
context "context"
"path/filepath"

"github.com/go-logr/logr"
"go.artefactual.dev/amclient"
Expand All @@ -16,8 +17,12 @@ type StartTransferActivity struct {
}

type StartTransferActivityParams struct {
// Name of the transfer.
Name string
Path string

// RelativePath is the PIP path relative to the Archivematica transfer
// source directory.
RelativePath string
}

type StartTransferActivityResult struct {
Expand All @@ -40,7 +45,11 @@ func (a *StartTransferActivity) Execute(
ctx context.Context,
opts *StartTransferActivityParams,
) (*StartTransferActivityResult, error) {
a.logger.V(1).Info("Executing StartTransferActivity", "Name", opts.Name, "Path", opts.Path)
a.logger.V(1).Info(
"Executing StartTransferActivity",
"Name", opts.Name,
"RelativePath", opts.RelativePath,
)

processingConfig := a.cfg.ProcessingConfig
if processingConfig == "" {
Expand All @@ -49,8 +58,8 @@ func (a *StartTransferActivity) Execute(

payload, resp, err := a.amps.Create(ctx, &amclient.PackageCreateRequest{
Name: opts.Name,
Type: "zipfile",
Path: opts.Path,
Type: "zipped bag",
Path: filepath.Join(a.cfg.TransferSourcePath, opts.RelativePath),
ProcessingConfig: processingConfig,
AutoApprove: true,
})
Expand Down
13 changes: 7 additions & 6 deletions internal/am/start_transfer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,17 @@ func TestStartTransferActivity(t *testing.T) {

transferID := uuid.New().String()
opts := am.StartTransferActivityParams{
Name: "Testing",
Path: "/tmp",
Name: "Testing",
RelativePath: "/tmp",
}

amcrDefault := func(m *amclienttest.MockPackageServiceMockRecorder, st http.Response) {
m.Create(
mockutil.Context(),
&amclient.PackageCreateRequest{
Name: opts.Name,
Type: "zipfile",
Path: opts.Path,
Type: "zipped bag",
Path: opts.RelativePath,
ProcessingConfig: "automated",
AutoApprove: true,
},
Expand All @@ -59,8 +59,8 @@ func TestStartTransferActivity(t *testing.T) {
mockutil.Context(),
&amclient.PackageCreateRequest{
Name: opts.Name,
Type: "zipfile",
Path: opts.Path,
Type: "zipped bag",
Path: opts.RelativePath,
ProcessingConfig: "automated",
AutoApprove: true,
},
Expand Down Expand Up @@ -117,6 +117,7 @@ func TestStartTransferActivity(t *testing.T) {

return
}
assert.NilError(t, err)

var r am.StartTransferActivityResult
err = future.Get(&r)
Expand Down
15 changes: 9 additions & 6 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"time"

"github.com/artefactual-sdps/temporal-activities/archive"
"github.com/artefactual-sdps/temporal-activities/bagit"
"github.com/google/uuid"
"github.com/mitchellh/mapstructure"
"github.com/spf13/viper"
Expand Down Expand Up @@ -40,6 +41,7 @@ type Configuration struct {
AM am.Config
InternalAPI api.Config
API api.Config
BagIt bagit.Config
Database db.Config
Event event.Config
ExtractActivity archive.Config
Expand All @@ -52,13 +54,14 @@ type Configuration struct {
Telemetry telemetry.Config
}

func (c Configuration) Validate() error {
func (c *Configuration) Validate() error {
// TODO: should this validate all the fields in Configuration?
apiAuthErr := c.API.Auth.Validate()
preprocessingErr := c.Preprocessing.Validate()
uploadErr := c.Upload.Validate()

return errors.Join(apiAuthErr, preprocessingErr, uploadErr)
return errors.Join(
c.API.Auth.Validate(),
c.BagIt.Validate(),
c.Preprocessing.Validate(),
c.Upload.Validate(),
)
}

func Read(config *Configuration, configFile string) (found bool, configFileUsed string, err error) {
Expand Down
1 change: 1 addition & 0 deletions internal/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ func TestConfig(t *testing.T) {
assert.Equal(t, c.AM.Capacity, 1)
assert.Equal(t, c.AM.PollInterval, 10*time.Second)
assert.Equal(t, c.API.Listen, "127.0.0.1:9000")
assert.Equal(t, c.BagIt.ChecksumAlgorithm, "sha512")
assert.Equal(t, c.DebugListen, "127.0.0.1:9001")
assert.Equal(t, c.Preservation.TaskQueue, temporal.A3mWorkerTaskQueue)
assert.Equal(t, c.Storage.TaskQueue, temporal.GlobalTaskQueue)
Expand Down
3 changes: 2 additions & 1 deletion internal/sftp/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ type Config struct {
// Private key used for authentication.
PrivateKey PrivateKey

// Default directory on SFTP server for file transfers.
// RemoteDir is the directory path, relative to the SFTP root directory,
// where PIPs should be uploaded.
RemoteDir string
}

Expand Down
Loading

0 comments on commit 129c30c

Please sign in to comment.