Skip to content

Commit

Permalink
Add persistence package
Browse files Browse the repository at this point in the history
Create a persistence package for database integration using Ent, with
support for SQLite and MySQL databases. Define database schema with
a single `sip` table with `name` and `checksum` columns that will be
used to check for duplicates at the bigginging of the preprocessing
workflow. The persistence layer will only be used when `checkDuplicates`
is set to `true` in the configuration.

Update the dev env/overlay to use an SQLite database and the Enduro
env/overlay to use MySQL. Enable `cgo` at build time and install `gcc`
and `musl-dev` in the build Docker image to be able to build an static
binary that uses `github.com/mattn/go-sqlite3`.

[skip-codecov]
  • Loading branch information
jraddaoui committed Jan 14, 2025
1 parent 577f82f commit a1e1c51
Show file tree
Hide file tree
Showing 44 changed files with 4,044 additions and 48 deletions.
5 changes: 3 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
ARG GO_VERSION=1.23.2

FROM golang:${GO_VERSION}-alpine AS build-go
RUN apk update && apk add --no-cache gcc musl-dev
WORKDIR /src
ENV CGO_ENABLED=0
ENV CGO_ENABLED=1
COPY --link go.* ./
RUN --mount=type=cache,target=/go/pkg/mod go mod download
COPY --link . .
Expand All @@ -18,7 +19,7 @@ RUN --mount=type=cache,target=/go/pkg/mod \
--mount=type=cache,target=/root/.cache/go-build \
go build \
-trimpath \
-ldflags="-X '${VERSION_PATH}.Long=${VERSION_LONG}' -X '${VERSION_PATH}.Short=${VERSION_SHORT}' -X '${VERSION_PATH}.GitCommit=${VERSION_GIT_HASH}'" \
-ldflags="-extldflags '-static' -X '${VERSION_PATH}.Long=${VERSION_LONG}' -X '${VERSION_PATH}.Short=${VERSION_SHORT}' -X '${VERSION_PATH}.GitCommit=${VERSION_GIT_HASH}'" \
-o /out/preprocessing-worker \
./cmd/worker

Expand Down
15 changes: 13 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ else
endif

include hack/make/bootstrap.mk
include hack/make/dep_ent.mk
include hack/make/dep_go_enum.mk
include hack/make/dep_golangci_lint.mk
include hack/make/dep_golines.mk
Expand All @@ -27,7 +28,8 @@ include hack/make/dep_tparse.mk
include hack/make/enums.mk

# Lazy-evaluated list of tools.
TOOLS = $(GOLANGCI_LINT) \
TOOLS = $(ENT) \
$(GOLANGCI_LINT) \
$(GOMAJOR) \
$(GOSEC) \
$(GOTESTSUM) \
Expand All @@ -43,7 +45,10 @@ endef
IGNORED_PACKAGES := \
github.com/artefactual-sdps/preprocessing-sfa/hack/% \
github.com/artefactual-sdps/preprocessing-sfa/internal/%/fake \
github.com/artefactual-sdps/preprocessing-sfa/internal/enums
github.com/artefactual-sdps/preprocessing-sfa/internal/enums \
github.com/artefactual-sdps/preprocessing-sfa/internal/persistence/ent/db \
github.com/artefactual-sdps/preprocessing-sfa/internal/persistence/ent/db/% \
github.com/artefactual-sdps/preprocessing-sfa/internal/persistence/ent/schema

PACKAGES := $(shell go list ./...)
TEST_PACKAGES := $(filter-out $(IGNORED_PACKAGES),$(PACKAGES))
Expand All @@ -59,6 +64,12 @@ deps: # @HELP List available module dependency updates.
deps: $(GOMAJOR)
gomajor list

gen-ent: # @HELP Generate Ent assets.
gen-ent: $(ENT)
ent generate ./internal/persistence/ent/schema \
--feature sql/versioned-migration \
--target=./internal/persistence/ent/db

gen-mock: # @HELP Generate mocks.
gen-mock: $(MOCKGEN)
mockgen -typed -destination=./internal/fformat/fake/mock_identifier.go -package=fake github.com/artefactual-sdps/preprocessing-sfa/internal/fformat Identifier
Expand Down
6 changes: 6 additions & 0 deletions Tiltfile.enduro
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,9 @@ k8s_resource(
trigger_mode=trigger_mode
)
k8s_resource("minio-ais-bucket", labels=["Preprocessing"])
k8s_resource(
"mysql-recreate-prep-database",
labels=["Preprocessing"],
auto_init=False,
trigger_mode=TRIGGER_MODE_MANUAL
)
45 changes: 43 additions & 2 deletions cmd/worker/workercmd/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@ package workercmd
import (
"context"
"crypto/rand"
"errors"
"fmt"

"ariga.io/sqlcomment"
"entgo.io/ent/dialect/sql"
bagit_gython "github.com/artefactual-labs/bagit-gython"
"github.com/artefactual-sdps/temporal-activities/bagcreate"
"github.com/artefactual-sdps/temporal-activities/bagvalidate"
Expand All @@ -21,6 +25,8 @@ import (
"github.com/artefactual-sdps/preprocessing-sfa/internal/config"
"github.com/artefactual-sdps/preprocessing-sfa/internal/fformat"
"github.com/artefactual-sdps/preprocessing-sfa/internal/fvalidate"
"github.com/artefactual-sdps/preprocessing-sfa/internal/persistence"
"github.com/artefactual-sdps/preprocessing-sfa/internal/persistence/ent/db"
"github.com/artefactual-sdps/preprocessing-sfa/internal/workflow"
)

Expand All @@ -32,6 +38,7 @@ type Main struct {
temporalWorker temporalsdk_worker.Worker
temporalClient temporalsdk_client.Client
bagValidator *bagit_gython.BagIt
dbClient *db.Client
}

func NewMain(logger logr.Logger, cfg config.Configuration) *Main {
Expand Down Expand Up @@ -137,6 +144,32 @@ func (m *Main) Run(ctx context.Context) error {
temporalsdk_activity.RegisterOptions{Name: bagcreate.Name},
)

if m.cfg.CheckDuplicates {
sqlDB, err := persistence.Open(m.cfg.Persistence.Driver, m.cfg.Persistence.DSN)
if err != nil {
m.logger.Error(err, "Error initializing database pool.")
return err
}
m.dbClient = db.NewClient(
db.Driver(
sqlcomment.NewDriver(
sql.OpenDB(m.cfg.Persistence.Driver, sqlDB),
sqlcomment.WithDriverVerTag(),
sqlcomment.WithTags(sqlcomment.Tags{
sqlcomment.KeyApplication: Name,
}),
),
),
)
if m.cfg.Persistence.Migrate {
err = m.dbClient.Schema.Create(ctx)
if err != nil {
m.logger.Error(err, "Error migrating database.")
return err
}
}
}

if err := w.Start(); err != nil {
m.logger.Error(err, "Preprocessing worker failed to start.")
return err
Expand All @@ -146,6 +179,8 @@ func (m *Main) Run(ctx context.Context) error {
}

func (m *Main) Close() error {
var e error

if m.temporalWorker != nil {
m.temporalWorker.Stop()
}
Expand All @@ -156,9 +191,15 @@ func (m *Main) Close() error {

if m.bagValidator != nil {
if err := m.bagValidator.Cleanup(); err != nil {
m.logger.Info("Couldn't clean up bag validator: %v", err)
e = errors.Join(e, fmt.Errorf("Couldn't clean up bag validator: %v", err))
}
}

return nil
if m.dbClient != nil {
if err := m.dbClient.Close(); err != nil {
e = errors.Join(e, fmt.Errorf("Couldn't close database client: %v", err))
}
}

return e
}
36 changes: 28 additions & 8 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,18 @@ module github.com/artefactual-sdps/preprocessing-sfa
go 1.23.2

require (
ariga.io/sqlcomment v0.1.0
entgo.io/ent v0.14.1
github.com/antchfx/xmlquery v1.4.2
github.com/artefactual-labs/bagit-gython v0.2.0
github.com/artefactual-sdps/temporal-activities v0.0.0-20241105002718-bc4a9d85ce42
github.com/beevik/etree v1.4.0
github.com/deckarep/golang-set/v2 v2.6.0
github.com/go-logr/logr v1.4.2
github.com/go-sql-driver/mysql v1.8.1
github.com/google/uuid v1.6.0
github.com/hashicorp/go-cleanhttp v0.5.2
github.com/mattn/go-sqlite3 v1.14.22
github.com/oklog/run v1.1.0
github.com/richardlehane/siegfried v1.11.1
github.com/spf13/pflag v1.0.5
Expand All @@ -25,7 +29,11 @@ require (
)

require (
ariga.io/atlas v0.19.2 // indirect
filippo.io/edwards25519 v1.1.0 // indirect
github.com/agext/levenshtein v1.2.3 // indirect
github.com/antchfx/xpath v1.3.2 // indirect
github.com/apparentlymart/go-textseg/v15 v15.0.0 // indirect
github.com/aws/aws-sdk-go v1.55.5 // indirect
github.com/aws/aws-sdk-go-v2 v1.30.3 // indirect
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.3 // indirect
Expand All @@ -49,24 +57,28 @@ require (
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/facebookgo/clock v0.0.0-20150410010913-600d898af40a // indirect
github.com/fsnotify/fsnotify v1.7.0 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-logr/zapr v1.3.0 // indirect
github.com/go-openapi/inflect v0.19.0 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/golang/mock v1.6.0 // indirect
github.com/google/go-cmp v0.6.0 // indirect
github.com/google/wire v0.6.0 // indirect
github.com/googleapis/gax-go/v2 v2.13.0 // indirect
github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.1 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 // indirect
github.com/hashicorp/hcl v1.0.0 // indirect
github.com/hashicorp/hcl/v2 v2.19.1 // indirect
github.com/jmespath/go-jmespath v0.4.0 // indirect
github.com/kluctl/go-embed-python v0.0.0-3.12.3-20240415-1 // indirect
github.com/magiconair/properties v1.8.7 // indirect
github.com/mitchellh/go-wordwrap v1.0.1 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/nyudlts/go-bagit v0.3.0-alpha.0.20240515212815-8dab411c23af // indirect
github.com/otiai10/copy v1.14.0 // indirect
github.com/pborman/uuid v1.2.1 // indirect
github.com/pelletier/go-toml/v2 v2.1.0 // indirect
github.com/pelletier/go-toml/v2 v2.1.1 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/richardlehane/characterize v1.0.0 // indirect
github.com/richardlehane/match v1.0.5 // indirect
Expand All @@ -79,27 +91,35 @@ require (
github.com/ross-spencer/wikiprov v0.2.0 // indirect
github.com/sagikazarmark/locafero v0.4.0 // indirect
github.com/sagikazarmark/slog-shim v0.1.0 // indirect
github.com/sergi/go-diff v1.3.1 // indirect
github.com/sirupsen/logrus v1.9.3 // indirect
github.com/sourcegraph/conc v0.3.0 // indirect
github.com/spf13/afero v1.11.0 // indirect
github.com/spf13/cast v1.6.0 // indirect
github.com/stretchr/objx v0.5.2 // indirect
github.com/subosito/gotenv v1.6.0 // indirect
github.com/zclconf/go-cty v1.14.1 // indirect
go.opencensus.io v0.24.0 // indirect
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 // indirect
go.opentelemetry.io/otel v1.29.0 // indirect
go.opentelemetry.io/otel/metric v1.29.0 // indirect
go.opentelemetry.io/otel/trace v1.29.0 // indirect
go.temporal.io/api v1.32.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
go.uber.org/zap v1.27.0 // indirect
golang.org/x/exp v0.0.0-20231219180239-dc181d75b848 // indirect
golang.org/x/image v0.17.0 // indirect
golang.org/x/net v0.28.0 // indirect
golang.org/x/sync v0.8.0 // indirect
golang.org/x/sys v0.24.0 // indirect
golang.org/x/text v0.17.0 // indirect
golang.org/x/mod v0.20.0 // indirect
golang.org/x/net v0.33.0 // indirect
golang.org/x/sync v0.10.0 // indirect
golang.org/x/sys v0.28.0 // indirect
golang.org/x/text v0.21.0 // indirect
golang.org/x/time v0.6.0 // indirect
golang.org/x/xerrors v0.0.0-20240716161551-93cc26a95ae9 // indirect
google.golang.org/api v0.191.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20240812133136-8ffd90a71988 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240812133136-8ffd90a71988 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20240822170219-fc7c04adadcd // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240822170219-fc7c04adadcd // indirect
google.golang.org/grpc v1.65.0 // indirect
google.golang.org/protobuf v1.34.2 // indirect
gopkg.in/ini.v1 v1.67.0 // indirect
Expand Down
Loading

0 comments on commit a1e1c51

Please sign in to comment.