Skip to content

Commit

Permalink
Make bootstrapping handle its own timeouts
Browse files Browse the repository at this point in the history
Currently, an engine registers timeouts into the handler, which schedules the timeouts on behalf of the the engine.
The handler then notifies the engine when the timeout expired.

However, the only engine that uses this mechanism is the bootstrapping engine, and not the other engine types such as the snowman and state sync engines.

It therefore makes sense that the bootstrapper handle its own timeouts instead of delegating them to the handler.
By moving the timeout handling into the bootstrapper, we can make the API of the common.Engine be slimmer by removing the Timeout() method from it.

Signed-off-by: Yacov Manevich <[email protected]>
  • Loading branch information
yacovm committed Sep 24, 2024
1 parent 3cacae5 commit b54e285
Show file tree
Hide file tree
Showing 18 changed files with 175 additions and 142 deletions.
2 changes: 0 additions & 2 deletions chains/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -962,7 +962,6 @@ func (m *manager) createAvalancheChain(
StartupTracker: startupTracker,
Sender: snowmanMessageSender,
BootstrapTracker: sb,
Timer: h,
PeerTracker: peerTracker,
AncestorsMaxContainersReceived: m.BootstrapAncestorsMaxContainersReceived,
DB: blockBootstrappingDB,
Expand Down Expand Up @@ -1358,7 +1357,6 @@ func (m *manager) createSnowmanChain(
StartupTracker: startupTracker,
Sender: messageSender,
BootstrapTracker: sb,
Timer: h,
PeerTracker: peerTracker,
AncestorsMaxContainersReceived: m.BootstrapAncestorsMaxContainersReceived,
DB: bootstrappingDB,
Expand Down
18 changes: 0 additions & 18 deletions message/internal_msg_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ import (
var (
disconnected = &Disconnected{}
gossipRequest = &GossipRequest{}
timeout = &Timeout{}

_ fmt.Stringer = (*GetStateSummaryFrontierFailed)(nil)
_ chainIDGetter = (*GetStateSummaryFrontierFailed)(nil)
Expand Down Expand Up @@ -50,8 +49,6 @@ var (
_ fmt.Stringer = (*Disconnected)(nil)

_ fmt.Stringer = (*GossipRequest)(nil)

_ fmt.Stringer = (*Timeout)(nil)
)

type GetStateSummaryFrontierFailed struct {
Expand Down Expand Up @@ -391,18 +388,3 @@ func InternalGossipRequest(
expiration: mockable.MaxTime,
}
}

type Timeout struct{}

func (Timeout) String() string {
return ""
}

func InternalTimeout(nodeID ids.NodeID) InboundMessage {
return &inboundMessage{
nodeID: nodeID,
op: TimeoutOp,
message: timeout,
expiration: mockable.MaxTime,
}
}
4 changes: 1 addition & 3 deletions snow/engine/common/bootstrap_tracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@ type BootstrapTracker interface {
// Returns true iff done bootstrapping
IsBootstrapped() bool

// Bootstrapped marks the named chain as being bootstrapped
// Bootstrapped marks the named chain as being bootstrapped.
Bootstrapped(chainID ids.ID)

OnBootstrapCompleted() chan struct{}
}
3 changes: 0 additions & 3 deletions snow/engine/common/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -415,9 +415,6 @@ type InternalHandler interface {
// Notify this engine of peer changes.
validators.Connector

// Notify this engine that a registered timeout has fired.
Timeout(context.Context) error

// Gossip to the network a container on the accepted frontier
Gossip(context.Context) error

Expand Down
10 changes: 0 additions & 10 deletions snow/engine/common/timer.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,3 @@
// See the file LICENSE for licensing terms.

package common

import "time"

// Timer describes the standard interface for specifying a timeout
type Timer interface {
// RegisterTimeout specifies how much time to delay the next timeout message
// by. If the subnet has been bootstrapped, the timeout will fire
// immediately.
RegisterTimeout(time.Duration)
}
7 changes: 0 additions & 7 deletions snow/engine/common/traced_engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -329,13 +329,6 @@ func (e *tracedEngine) Disconnected(ctx context.Context, nodeID ids.NodeID) erro
return e.engine.Disconnected(ctx, nodeID)
}

func (e *tracedEngine) Timeout(ctx context.Context) error {
ctx, span := e.tracer.Start(ctx, "tracedEngine.Timeout")
defer span.End()

return e.engine.Timeout(ctx)
}

func (e *tracedEngine) Gossip(ctx context.Context) error {
ctx, span := e.tracer.Start(ctx, "tracedEngine.Gossip")
defer span.End()
Expand Down
14 changes: 0 additions & 14 deletions snow/engine/enginetest/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import (
)

var (
errTimeout = errors.New("unexpectedly called Timeout")
errGossip = errors.New("unexpectedly called Gossip")
errNotify = errors.New("unexpectedly called Notify")
errGetStateSummaryFrontier = errors.New("unexpectedly called GetStateSummaryFrontier")
Expand Down Expand Up @@ -189,19 +188,6 @@ func (e *Engine) Start(ctx context.Context, startReqID uint32) error {
return errStart
}

func (e *Engine) Timeout(ctx context.Context) error {
if e.TimeoutF != nil {
return e.TimeoutF(ctx)
}
if !e.CantTimeout {
return nil
}
if e.T != nil {
require.FailNow(e.T, errTimeout.Error())
}
return errTimeout
}

func (e *Engine) Gossip(ctx context.Context) error {
if e.GossipF != nil {
return e.GossipF(ctx)
Expand Down
13 changes: 4 additions & 9 deletions snow/engine/enginetest/timer.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,8 @@ import (
"time"

"github.com/stretchr/testify/require"

"github.com/ava-labs/avalanchego/snow/engine/common"
)

var _ common.Timer = (*Timer)(nil)

// Timer is a test timer
type Timer struct {
T *testing.T
Expand All @@ -23,15 +19,14 @@ type Timer struct {
RegisterTimeoutF func(time.Duration)
}

// Default set the default callable value to [cant]
func (t *Timer) Default(cant bool) {
t.CantRegisterTimout = cant
}

func (t *Timer) RegisterTimeout(delay time.Duration) {
if t.RegisterTimeoutF != nil {
t.RegisterTimeoutF(delay)
} else if t.CantRegisterTimout && t.T != nil {
require.FailNow(t.T, "Unexpectedly called RegisterTimeout")
}
}

func (t *Timer) Preempt() {
t.RegisterTimeout(time.Duration(0))
}
19 changes: 15 additions & 4 deletions snow/engine/snowman/bootstrap/bootstrapper.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ type Bootstrapper struct {
Config
shouldHalt func() bool
*metrics

TimeoutRegistry TimeoutRegistry
// list of NoOpsHandler for messages dropped by bootstrapper
common.StateSummaryFrontierHandler
common.AcceptedStateSummaryHandler
Expand Down Expand Up @@ -119,7 +119,7 @@ type Bootstrapper struct {

func New(config Config, onFinished func(ctx context.Context, lastReqID uint32) error) (*Bootstrapper, error) {
metrics, err := newMetrics(config.Ctx.Registerer)
return &Bootstrapper{
bs := &Bootstrapper{
shouldHalt: config.ShouldHalt,
nonVerifyingParser: config.NonVerifyingParse,
Config: config,
Expand All @@ -139,7 +139,15 @@ func New(config Config, onFinished func(ctx context.Context, lastReqID uint32) e

executedStateTransitions: math.MaxInt,
onFinished: onFinished,
}, err
}

bs.TimeoutRegistry = newTimeoutHandler(func() {
if err := bs.Timeout(context.TODO()); err != nil {
bs.Config.Ctx.Log.Warn("Encountered error during bootstrapping: %w", zap.Error(err))
}
})

return bs, err
}

func (b *Bootstrapper) Context() *snow.ConsensusContext {
Expand Down Expand Up @@ -696,14 +704,17 @@ func (b *Bootstrapper) tryStartExecuting(ctx context.Context) error {

// Notify the subnet that this chain is synced
b.Config.BootstrapTracker.Bootstrapped(b.Ctx.ChainID)
if b.Config.BootstrapTracker.IsBootstrapped() {
b.TimeoutRegistry.Preempt()
}

// If the subnet hasn't finished bootstrapping, this chain should remain
// syncing.
if !b.Config.BootstrapTracker.IsBootstrapped() {
log("waiting for the remaining chains in this subnet to finish syncing")
// Restart bootstrapping after [bootstrappingDelay] to keep up to date
// on the latest tip.
b.Config.Timer.RegisterTimeout(bootstrappingDelay)
b.TimeoutRegistry.RegisterTimeout(bootstrappingDelay)
b.awaitingTimeout = true
return nil
}
Expand Down
15 changes: 12 additions & 3 deletions snow/engine/snowman/bootstrap/bootstrapper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,6 @@ func newConfig(t *testing.T) (Config, ids.NodeID, *enginetest.Sender, *blocktest
PeerTracker: peerTracker,
Sender: sender,
BootstrapTracker: bootstrapTracker,
Timer: &enginetest.Timer{},
AncestorsMaxContainersReceived: 2000,
DB: memdb.New(),
VM: vm,
Expand Down Expand Up @@ -155,7 +154,6 @@ func TestBootstrapperStartsOnlyIfEnoughStakeIsConnected(t *testing.T) {
PeerTracker: peerTracker,
Sender: sender,
BootstrapTracker: &enginetest.BootstrapTracker{},
Timer: &enginetest.Timer{},
AncestorsMaxContainersReceived: 2000,
DB: memdb.New(),
VM: vm,
Expand All @@ -180,6 +178,7 @@ func TestBootstrapperStartsOnlyIfEnoughStakeIsConnected(t *testing.T) {
}
bs, err := New(cfg, dummyCallback)
require.NoError(err)
bs.TimeoutRegistry = &enginetest.Timer{}

vm.CantSetState = false
vm.CantConnected = true
Expand Down Expand Up @@ -236,6 +235,7 @@ func TestBootstrapperSingleFrontier(t *testing.T) {
},
)
require.NoError(err)
bs.TimeoutRegistry = &enginetest.Timer{}

require.NoError(bs.Start(context.Background(), 0))

Expand Down Expand Up @@ -264,6 +264,7 @@ func TestBootstrapperUnknownByzantineResponse(t *testing.T) {
},
)
require.NoError(err)
bs.TimeoutRegistry = &enginetest.Timer{}

require.NoError(bs.Start(context.Background(), 0))

Expand Down Expand Up @@ -309,6 +310,7 @@ func TestBootstrapperPartialFetch(t *testing.T) {
},
)
require.NoError(err)
bs.TimeoutRegistry = &enginetest.Timer{}

require.NoError(bs.Start(context.Background(), 0))

Expand Down Expand Up @@ -359,6 +361,7 @@ func TestBootstrapperEmptyResponse(t *testing.T) {
},
)
require.NoError(err)
bs.TimeoutRegistry = &enginetest.Timer{}

require.NoError(bs.Start(context.Background(), 0))

Expand Down Expand Up @@ -407,6 +410,7 @@ func TestBootstrapperAncestors(t *testing.T) {
},
)
require.NoError(err)
bs.TimeoutRegistry = &enginetest.Timer{}

require.NoError(bs.Start(context.Background(), 0))

Expand Down Expand Up @@ -452,6 +456,7 @@ func TestBootstrapperFinalized(t *testing.T) {
},
)
require.NoError(err)
bs.TimeoutRegistry = &enginetest.Timer{}

require.NoError(bs.Start(context.Background(), 0))

Expand Down Expand Up @@ -494,6 +499,7 @@ func TestRestartBootstrapping(t *testing.T) {
},
)
require.NoError(err)
bs.TimeoutRegistry = &enginetest.Timer{}

require.NoError(bs.Start(context.Background(), 0))

Expand Down Expand Up @@ -558,6 +564,7 @@ func TestBootstrapOldBlockAfterStateSync(t *testing.T) {
},
)
require.NoError(err)
bs.TimeoutRegistry = &enginetest.Timer{}

require.NoError(bs.Start(context.Background(), 0))

Expand Down Expand Up @@ -598,6 +605,7 @@ func TestBootstrapContinueAfterHalt(t *testing.T) {
},
)
require.NoError(err)
bs.TimeoutRegistry = &enginetest.Timer{}

getBlockF := vm.GetBlockF
vm.GetBlockF = func(ctx context.Context, blkID ids.ID) (snowman.Block, error) {
Expand Down Expand Up @@ -690,7 +698,6 @@ func TestBootstrapNoParseOnNew(t *testing.T) {
PeerTracker: peerTracker,
Sender: sender,
BootstrapTracker: bootstrapTracker,
Timer: &enginetest.Timer{},
AncestorsMaxContainersReceived: 2000,
DB: intervalDB,
VM: vm,
Expand Down Expand Up @@ -728,6 +735,7 @@ func TestBootstrapperReceiveStaleAncestorsMessage(t *testing.T) {
},
)
require.NoError(err)
bs.TimeoutRegistry = &enginetest.Timer{}

require.NoError(bs.Start(context.Background(), 0))

Expand Down Expand Up @@ -772,6 +780,7 @@ func TestBootstrapperRollbackOnSetState(t *testing.T) {
return nil
},
)
bs.TimeoutRegistry = &enginetest.Timer{}
require.NoError(err)

vm.SetStateF = func(context.Context, snow.State) error {
Expand Down
1 change: 0 additions & 1 deletion snow/engine/snowman/bootstrap/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ type Config struct {
StartupTracker tracker.Startup
Sender common.Sender
BootstrapTracker common.BootstrapTracker
Timer common.Timer

// PeerTracker manages the set of nodes that we fetch the next block from.
PeerTracker *p2p.PeerTracker
Expand Down
Loading

0 comments on commit b54e285

Please sign in to comment.