Skip to content

Commit

Permalink
Enable packaging v6 DBs (#437)
Browse files Browse the repository at this point in the history
* enable packaging v6 DBs

Signed-off-by: Alex Goodman <[email protected]>

* moderate review comments

Signed-off-by: Alex Goodman <[email protected]>

* port fix from #445

Signed-off-by: Alex Goodman <[email protected]>

---------

Signed-off-by: Alex Goodman <[email protected]>
  • Loading branch information
wagoodman authored Dec 4, 2024
1 parent f303b22 commit 8d377cf
Show file tree
Hide file tree
Showing 11 changed files with 371 additions and 124 deletions.
3 changes: 2 additions & 1 deletion DEVELOPING.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ well as acceptance testing. You will require the following:

- Python 3.8+ installed on your system. Consider using [pyenv](https://github.com/pyenv/pyenv) if you do not have a
preference for managing python interpreter installations.

- `zstd` binary utility if you are packaging v6+ DB schemas
- _(optional)_ `xz` binary utility if you have specifically overridden the package command options

- [Poetry](https://python-poetry.org/) installed for dependency and virtualenv management for python dependencies, to install:

Expand Down
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ curl -sSfL https://raw.githubusercontent.com/anchore/grype-db/main/install.sh |
curl -sSfL https://raw.githubusercontent.com/anchore/grype-db/main/install.sh | sh -s -- -b <DESTINATION_DIR> <RELEASE_VERSION>
```

> [!IMPORTANT]
> You will require the `zstd` utility installed on your system to support the `package` command.
## Usage

Expand All @@ -39,6 +41,7 @@ grype-db pull [-g] [-p PROVIDER ...]
grype-db build [-g] [--dir=DIR] [--schema=SCHEMA] [--skip-validation] [-p PROVIDER ...]

# Package the already built DB file into an archive ready for upload and serving
# note: you will require the zstd utility to be installed on your system
grype-db package [--dir=DIR] [--publish-base-url=URL]
```

Expand All @@ -54,7 +57,7 @@ is created that is used in packaging and curation of the database file by this a
and a `provider-metadata.json` file is created that includes the last successful run date for each provider.
Use `-g` to generate the list of providers to pull based on the output of "vunnel list".

The `package` command archives the `vulnerability.db`, `metadata.json` and `provider-metadata.json` files into a `tar.gz` file. Additionally, a `listing.json`
The `package` command archives the `vulnerability.db` file into a `tar.zstd` file. Additionally, a `latest.json`
is generated to aid in serving one or more database archives for downstream consumption, where the consuming application should
use the listing file to discover available archives available for download. The base URL used to create the download URL for each
database archive is controlled by the `package.base-url` configuration option.
Expand Down
2 changes: 2 additions & 0 deletions cmd/grype-db/application/application.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"github.com/anchore/grype-db/internal/log"
"github.com/anchore/grype-db/internal/ui"
"github.com/anchore/grype-db/internal/utils"
"github.com/anchore/grype/grype"
)

const Name = internal.ApplicationName
Expand Down Expand Up @@ -169,6 +170,7 @@ func setupLogger(app *Config) error {
}

log.Set(l)
grype.SetLogger(l)

return nil
}
Expand Down
38 changes: 1 addition & 37 deletions cmd/grype-db/cli/commands/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"errors"
"fmt"
"os"
"time"

"github.com/scylladb/go-set/strset"
"github.com/spf13/cobra"
Expand Down Expand Up @@ -108,7 +107,7 @@ func runBuild(cfg buildConfig) error {
return fmt.Errorf("unable to get provider states: %w", err)
}

earliest, err := earliestTimestamp(states)
earliest, err := provider.States(states).EarliestTimestamp()
if err != nil {
return fmt.Errorf("unable to get earliest timestamp: %w", err)
}
Expand Down Expand Up @@ -152,38 +151,3 @@ func providerStates(skipValidation bool, providers []provider.Provider) ([]provi
}
return states, nil
}

func earliestTimestamp(states []provider.State) (time.Time, error) {
if len(states) == 0 {
return time.Time{}, fmt.Errorf("cannot find earliest timestamp: no states provided")
}

// special case when there is exactly 1 state, return its timestamp even
// if it is nvd, because otherwise quality gates that pull only nvd deterministically fail.
if len(states) == 1 {
return states[0].Timestamp, nil
}

var earliest time.Time
for _, s := range states {
// the NVD api is constantly down, so we don't want to consider it for the earliest timestamp
if s.Provider == "nvd" {
log.WithFields("provider", s.Provider).Debug("not considering data age for provider")
continue
}
if earliest.IsZero() {
earliest = s.Timestamp
continue
}
if s.Timestamp.Before(earliest) {
earliest = s.Timestamp
}
}

if earliest.IsZero() {
return time.Time{}, fmt.Errorf("unable to determine earliest timestamp")
}

log.WithFields("timestamp", earliest).Debug("earliest data timestamp")
return earliest, nil
}
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,8 @@ require (
github.com/opencontainers/image-spec v1.1.0 // indirect
github.com/opencontainers/runtime-spec v1.1.0 // indirect
github.com/opencontainers/selinux v1.11.0 // indirect
github.com/openvex/go-vex v0.2.5 // indirect
github.com/package-url/packageurl-go v0.1.1 // indirect
github.com/pborman/indent v1.2.1 // indirect
github.com/pelletier/go-toml v1.9.5 // indirect
github.com/pelletier/go-toml/v2 v2.2.2 // indirect
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -810,6 +810,10 @@ github.com/opencontainers/runtime-spec v1.1.0 h1:HHUyrt9mwHUjtasSbXSMvs4cyFxh+Bl
github.com/opencontainers/runtime-spec v1.1.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/selinux v1.11.0 h1:+5Zbo97w3Lbmb3PeqQtpmTkMwsW5nRI3YaLpt7tQ7oU=
github.com/opencontainers/selinux v1.11.0/go.mod h1:E5dMC3VPuVvVHDYmi78qvhJp8+M586T4DlDRYpFkyec=
github.com/openvex/go-vex v0.2.5 h1:41utdp2rHgAGCsG+UbjmfMG5CWQxs15nGqir1eRgSrQ=
github.com/openvex/go-vex v0.2.5/go.mod h1:j+oadBxSUELkrKh4NfNb+BPo77U3q7gdKME88IO/0Wo=
github.com/package-url/packageurl-go v0.1.1 h1:KTRE0bK3sKbFKAk3yy63DpeskU7Cvs/x/Da5l+RtzyU=
github.com/package-url/packageurl-go v0.1.1/go.mod h1:uQd4a7Rh3ZsVg5j0lNyAfyxIeGde9yrlhjF78GzeW0c=
github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
github.com/pborman/indent v1.2.1 h1:lFiviAbISHv3Rf0jcuh489bi06hj98JsVMtIDZQb9yM=
Expand Down
91 changes: 82 additions & 9 deletions internal/tarutil/writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,17 @@ package tarutil

import (
"archive/tar"
"bufio"
"compress/gzip"
"fmt"
"io"
"os"
"os/exec"
"strings"

"github.com/klauspost/compress/zstd"
"github.com/google/shlex"

"github.com/anchore/grype-db/internal/log"
)

var ErrUnsupportedArchiveSuffix = fmt.Errorf("archive name has an unsupported suffix")
Expand All @@ -20,7 +24,7 @@ type writer struct {
writer *tar.Writer
}

// NewWriter creates a new tar writer that writes to the specified archive path. Supports .tar.gz and .tar.zst file extensions.
// NewWriter creates a new tar writer that writes to the specified archive path. Supports .tar.gz, .tar.zst, .tar.xz, and .tar file extensions.
func NewWriter(archivePath string) (Writer, error) {
w, err := newCompressor(archivePath)
if err != nil {
Expand All @@ -45,19 +49,88 @@ func newCompressor(archivePath string) (io.WriteCloser, error) {
case strings.HasSuffix(archivePath, ".tar.gz"):
return gzip.NewWriter(archive), nil
case strings.HasSuffix(archivePath, ".tar.zst"):
// adding zstd.WithWindowSize(zstd.MaxWindowSize), zstd.WithAllLitEntropyCompression(true)
// will have slightly better results, but use a lot more memory
w, err := zstd.NewWriter(archive, zstd.WithEncoderLevel(zstd.SpeedBestCompression))
if err != nil {
return nil, fmt.Errorf("unable to get zst compression stream: %w", err)
}
return w, nil
// note: since we're using --ultra this tends to have a high memory usage at decompression time
// For ~700 MB payload that is compressing down to ~60 MB, that would need ~130 MB of memory (--ultra -22)
// for the same payload compressing down to ~65MB, that would need ~70MB of memory (--ultra -21)
return newShellCompressor("zstd -T0 -22 --ultra -c -vv", archive)
case strings.HasSuffix(archivePath, ".tar.xz"):
return newShellCompressor("xz -9 --threads=0 -c -vv", archive)
case strings.HasSuffix(archivePath, ".tar"):
return archive, nil
}
return nil, ErrUnsupportedArchiveSuffix
}

// shellCompressor wraps the stdin pipe of an external compression process and ensures proper cleanup.
type shellCompressor struct {
cmd *exec.Cmd
pipe io.WriteCloser
}

func newShellCompressor(c string, archive io.Writer) (*shellCompressor, error) {
args, err := shlex.Split(c)
if err != nil {
return nil, fmt.Errorf("unable to parse command: %w", err)
}
binary := args[0]

binPath, err := exec.LookPath(binary)
if err != nil {
return nil, fmt.Errorf("unable to find binary %q: %w", binary, err)
}
if binPath == "" {
return nil, fmt.Errorf("unable to find binary %q in PATH", binary)
}

args = args[1:]
cmd := exec.Command(binary, args...)
log.Debug(strings.Join(cmd.Args, " "))
cmd.Stdout = archive

stderrPipe, err := cmd.StderrPipe()
if err != nil {
return nil, fmt.Errorf("unable to create stderr pipe: %w", err)
}

pipe, err := cmd.StdinPipe()
if err != nil {
return nil, fmt.Errorf("unable to create stdin pipe: %w", err)
}

if err := cmd.Start(); err != nil {
return nil, fmt.Errorf("unable to start process: %w", err)
}

go func() {
scanner := bufio.NewScanner(stderrPipe)
for scanner.Scan() {
log.Debugf("[%s] %s", binary, scanner.Text())
}
if err := scanner.Err(); err != nil {
log.Errorf("[%s] error reading stderr: %v", binary, err)
}
}()

return &shellCompressor{
cmd: cmd,
pipe: pipe,
}, nil
}

func (sc *shellCompressor) Write(p []byte) (int, error) {
return sc.pipe.Write(p)
}

func (sc *shellCompressor) Close() error {
if err := sc.pipe.Close(); err != nil {
return fmt.Errorf("unable to close compression stdin pipe: %w", err)
}
if err := sc.cmd.Wait(); err != nil {
return fmt.Errorf("compression process error: %w", err)
}
return nil
}

func (w *writer) WriteEntry(entry Entry) error {
return entry.writeEntry(w.writer)
}
Expand Down
Loading

0 comments on commit 8d377cf

Please sign in to comment.