Skip to content
This repository has been archived by the owner on May 26, 2022. It is now read-only.

Commit

Permalink
dial: limit error size
Browse files Browse the repository at this point in the history
Instead of storing _every_ error, store at most 32 errors (plus a "too many errors" error).

Helps address #119
  • Loading branch information
Stebalien committed Apr 24, 2019
1 parent 94a49f1 commit b9b8a09
Showing 1 changed file with 24 additions and 2 deletions.
26 changes: 24 additions & 2 deletions swarm_dial.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ var (
// ErrNoTransport is returned when we don't know a transport for the
// given multiaddr.
ErrNoTransport = errors.New("no transport for protocol")

// ErrTooManyErrors is returned as the final error when we encounter too many errors when dialing a peer.
ErrTooManyErrors = errors.New("too many errors")
)

// DialAttempts governs how many times a goroutine will try to dial a given peer.
Expand All @@ -58,6 +61,9 @@ const ConcurrentFdDials = 160
// per peer
const DefaultPerPeerRateLimit = 8

// maxDialErrors is the maximum number of dial errors we record
const maxDialErrors = 32

// dialbackoff is a struct used to avoid over-dialing the same, dead peers.
// Whenever we totally time out on a peer (all three attempts), we add them
// to dialbackoff. Then, whenevers goroutines would _wait_ (dialsync), they
Expand Down Expand Up @@ -362,6 +368,22 @@ func (s *Swarm) dialAddrs(ctx context.Context, p peer.ID, remoteAddrs <-chan ma.
respch := make(chan dialResult)
var dialErrors *multierror.Error

// aggregateErr aggregates returned errors into a single multi-error but
// limits the number of errors we record.
aggregateErr := func(err error) {
if dialErrors == nil || dialErrors.Len() < maxDialErrors {
// keep the error
} else if dialErrors.Len() == maxDialErrors {
// Make the last error "too many errors".
err = ErrTooManyErrors
} else {
// Already have too many errors.
return
}

dialErrors = multierror.Append(dialErrors, err)
}

defer s.limiter.clearAllPeerDials(p)

var active int
Expand All @@ -379,7 +401,7 @@ func (s *Swarm) dialAddrs(ctx context.Context, p peer.ID, remoteAddrs <-chan ma.
if resp.Err != nil {
// Errors are normal, lots of dials will fail
log.Infof("got error on dial: %s", resp.Err)
dialErrors = multierror.Append(dialErrors, resp.Err)
aggregateErr(resp.Err)
} else if resp.Conn != nil {
return resp.Conn, nil
}
Expand Down Expand Up @@ -410,7 +432,7 @@ func (s *Swarm) dialAddrs(ctx context.Context, p peer.ID, remoteAddrs <-chan ma.
if resp.Err != nil {
// Errors are normal, lots of dials will fail
log.Infof("got error on dial: %s", resp.Err)
dialErrors = multierror.Append(dialErrors, resp.Err)
aggregateErr(resp.Err)
} else if resp.Conn != nil {
return resp.Conn, nil
}
Expand Down

0 comments on commit b9b8a09

Please sign in to comment.