Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add jitter for retries #41

Merged
merged 4 commits into from
Sep 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/trivy-java-db/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ func init() {

rootCmd.PersistentFlags().StringVar(&cacheDir, "cache-dir", filepath.Join(userCacheDir, "trivy-java-db"),
"cache dir")
rootCmd.PersistentFlags().IntVar(&limit, "limit", 1000, "max parallelism")
rootCmd.PersistentFlags().IntVar(&limit, "limit", 300, "max parallelism")

rootCmd.AddCommand(crawlCmd)
rootCmd.AddCommand(buildCmd)
Expand Down
76 changes: 42 additions & 34 deletions pkg/crawler/crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"fmt"
"io"
"log/slog"
"math/rand"
"net/http"
"path/filepath"
"strings"
Expand Down Expand Up @@ -48,20 +49,22 @@ func NewCrawler(opt Option) Crawler {
client := retryablehttp.NewClient()
client.RetryMax = 10
client.Logger = slog.Default()
client.RetryWaitMin = 10 * time.Second
client.Backoff = func(min, max time.Duration, attemptNum int, resp *http.Response) time.Duration {
// Maven Central returns "Retry-After: 0" for some reason, resulting in an immediate retry.
if resp.Header.Get("Retry-After") == "0" {
resp.Header.Del("Retry-After")
client.RetryWaitMin = 1 * time.Minute
client.RetryWaitMax = 5 * time.Minute
client.Backoff = retryablehttp.LinearJitterBackoff
client.ResponseLogHook = func(_ retryablehttp.Logger, resp *http.Response) {
if resp.StatusCode != http.StatusOK {
slog.Warn("Unexpected http response", slog.String("url", resp.Request.URL.String()), slog.String("status", resp.Status))
}
return retryablehttp.DefaultBackoff(min, max, attemptNum, resp)
}
client.ErrorHandler = func(resp *http.Response, err error, numTries int) (*http.Response, error) {
if resp.StatusCode != http.StatusOK {
slog.Error("HTTP error", slog.String("url", resp.Request.URL.String()), slog.Int("num_tries", numTries),
slog.Int("status_code", resp.StatusCode))
logger := slog.With(slog.String("url", resp.Request.URL.String()), slog.Int("status_code", resp.StatusCode),
slog.Int("num_tries", numTries))
if err != nil {
logger = logger.With(slog.String("error", err.Error()))
}
return resp, err
logger.Error("HTTP request failed after retries")
return resp, xerrors.Errorf("HTTP request failed after retries: %w", err)
}

if opt.RootUrl == "" {
Expand Down Expand Up @@ -154,13 +157,9 @@ loop:
}

func (c *Crawler) Visit(ctx context.Context, url string) error {
req, err := retryablehttp.NewRequestWithContext(ctx, http.MethodGet, url, nil)
resp, err := c.httpGet(ctx, url)
if err != nil {
return xerrors.Errorf("unable to new HTTP request: %w", err)
}
resp, err := c.http.Do(req)
if err != nil {
return xerrors.Errorf("http get error (%s): %w", url, err)
return xerrors.Errorf("http get error: %w", err)
}
defer resp.Body.Close()

Expand Down Expand Up @@ -288,13 +287,9 @@ func (c *Crawler) crawlSHA1(ctx context.Context, baseURL string, meta *Metadata,
}

func (c *Crawler) sha1Urls(ctx context.Context, url string) ([]string, error) {
req, err := retryablehttp.NewRequestWithContext(ctx, http.MethodGet, url, nil)
resp, err := c.httpGet(ctx, url)
if err != nil {
return nil, xerrors.Errorf("unable to new HTTP request: %w", err)
}
resp, err := c.http.Do(req)
if err != nil {
return nil, xerrors.Errorf("http get error (%s): %w", url, err)
return nil, xerrors.Errorf("http get error: %w", err)
}
defer func() { _ = resp.Body.Close() }()

Expand Down Expand Up @@ -326,13 +321,9 @@ func (c *Crawler) parseMetadata(ctx context.Context, url string) (*Metadata, err
return nil, nil
}

req, err := retryablehttp.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return nil, xerrors.Errorf("unable to new HTTP request: %w", err)
}
resp, err := c.http.Do(req)
resp, err := c.httpGet(ctx, url)
if err != nil {
return nil, xerrors.Errorf("http get error (%s): %w", url, err)
return nil, xerrors.Errorf("http get error: %w", err)
}
defer resp.Body.Close()

Expand Down Expand Up @@ -361,13 +352,9 @@ func (c *Crawler) parseMetadata(ctx context.Context, url string) (*Metadata, err
}

func (c *Crawler) fetchSHA1(ctx context.Context, url string) ([]byte, error) {
req, err := retryablehttp.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return nil, xerrors.Errorf("unable to new HTTP request: %w", err)
}
resp, err := c.http.Do(req)
resp, err := c.httpGet(ctx, url)
if err != nil {
return nil, xerrors.Errorf("http get error (%s): %w", url, err)
return nil, xerrors.Errorf("http get error: %w", err)
}
defer func() { _ = resp.Body.Close() }()

Expand Down Expand Up @@ -406,6 +393,27 @@ func (c *Crawler) fetchSHA1(ctx context.Context, url string) ([]byte, error) {
return sha1b, nil
}

func (c *Crawler) httpGet(ctx context.Context, url string) (*http.Response, error) {
// Sleep for a while to avoid 429 error
randomSleep()

req, err := retryablehttp.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return nil, xerrors.Errorf("unable to create a HTTP request: %w", err)
}
resp, err := c.http.Do(req)
if err != nil {
return nil, xerrors.Errorf("http error (%s): %w", url, err)
}
return resp, nil
}

func randomSleep() {
// Seed rand
r := rand.New(rand.NewSource(int64(time.Now().Nanosecond())))
time.Sleep(time.Duration(r.Float64() * float64(100*time.Millisecond)))
}

func versionFromSha1URL(artifactId, sha1URL string) string {
ss := strings.Split(sha1URL, "/")
fileName := ss[len(ss)-1]
Expand Down
Loading