Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BESU-194] Remove max pivot block resets during fast sync #427

Merged
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,14 @@ public class PivotBlockRetriever<C> {

private static final Logger LOG = LogManager.getLogger();
public static final int MAX_QUERY_RETRIES_PER_PEER = 3;
private static final int DEFAULT_MAX_PIVOT_BLOCK_RESETS = 5;
private static final int SUSPICIOUS_NUMBER_OF_PIVOT_BLOCK_RESETS = 5;

private final EthContext ethContext;
private final MetricsSystem metricsSystem;
private final ProtocolSchedule<C> protocolSchedule;

// The number of peers we need to query to confirm our pivot block
private final int peersToQuery;
// The max times to push the pivot block number back when peers can't agree on a pivot
private final int maxPivotBlockResets;
// How far to push back the pivot block when we retry on pivot disagreement
private final long pivotBlockNumberResetDelta;
// The current pivot block number, gets pushed back if peers disagree on the pivot block
Expand All @@ -62,39 +60,20 @@ public class PivotBlockRetriever<C> {

private final AtomicBoolean isStarted = new AtomicBoolean(false);

PivotBlockRetriever(
public PivotBlockRetriever(
final ProtocolSchedule<C> protocolSchedule,
final EthContext ethContext,
final MetricsSystem metricsSystem,
final long pivotBlockNumber,
final int peersToQuery,
final long pivotBlockNumberResetDelta,
final int maxPivotBlockResets) {
final long pivotBlockNumberResetDelta) {
this.protocolSchedule = protocolSchedule;
this.ethContext = ethContext;
this.metricsSystem = metricsSystem;

this.pivotBlockNumber = new AtomicLong(pivotBlockNumber);
this.peersToQuery = peersToQuery;
this.pivotBlockNumberResetDelta = pivotBlockNumberResetDelta;
this.maxPivotBlockResets = maxPivotBlockResets;
}

public PivotBlockRetriever(
final ProtocolSchedule<C> protocolSchedule,
final EthContext ethContext,
final MetricsSystem metricsSystem,
final long pivotBlockNumber,
final int peersToQuery,
final long pivotBlockNumberResetDelta) {
this(
protocolSchedule,
ethContext,
metricsSystem,
pivotBlockNumber,
peersToQuery,
pivotBlockNumberResetDelta,
DEFAULT_MAX_PIVOT_BLOCK_RESETS);
}

public CompletableFuture<FastSyncState> downloadPivotBlockHeader() {
Expand Down Expand Up @@ -147,9 +126,10 @@ private void handleContestedPivotBlock(final long contestedBlockNumber) {
contestedBlockNumber, contestedBlockNumber - pivotBlockNumberResetDelta)) {
LOG.info("Received conflicting pivot blocks for {}.", contestedBlockNumber);

final int retryCount = confirmationTasks.size();
if (retryCount > maxPivotBlockResets
|| pivotBlockNumber.get() <= BlockHeader.GENESIS_BLOCK_NUMBER) {
if (confirmationTasks.size() > SUSPICIOUS_NUMBER_OF_PIVOT_BLOCK_RESETS) {
LOG.warn("Several attempts have been made without finding a pivot block");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd print a warning periodically (every SUSPICIOUS_NUMBER_OF_PIVOT_BLOCK_RESETS retries?) rather than on every individual retry. Also, confirmationTasks can now grow very large, so we'll probably need to rework how we track tasks so we don't hold onto every confirmation task we created.

Alternatively, I wonder if we could alleviate the problem of prematurely switching to full sync by just increasing DEFAULT_MAX_PIVOT_BLOCK_RESETS and making sure we retry fast sync here:

LOG.error("Fast sync failed, switching to full sync.", error);
...

matkt marked this conversation as resolved.
Show resolved Hide resolved
}
if (pivotBlockNumber.get() <= BlockHeader.GENESIS_BLOCK_NUMBER) {
LOG.info("Max retries reached, cancel pivot block download.");
matkt marked this conversation as resolved.
Show resolved Hide resolved
// Pivot block selection has failed
result.completeExceptionally(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,11 @@ public void setUp() {
ethProtocolManager =
EthProtocolManagerTestUtil.create(
blockchain, blockchainSetupUtil.getWorldArchive(), timeout::get);
pivotBlockRetriever = createPivotBlockRetriever(3, 1, 1);
pivotBlockRetriever = createPivotBlockRetriever(3, 1);
}

private PivotBlockRetriever<Void> createPivotBlockRetriever(
final int peersToQuery, final long pivotBlockDelta, final int maxRetries) {
final int peersToQuery, final long pivotBlockDelta) {
return pivotBlockRetriever =
spy(
new PivotBlockRetriever<>(
Expand All @@ -80,8 +80,7 @@ private PivotBlockRetriever<Void> createPivotBlockRetriever(
metricsSystem,
PIVOT_BLOCK_NUMBER,
peersToQuery,
pivotBlockDelta,
maxRetries));
pivotBlockDelta));
}

@Test
Expand All @@ -107,7 +106,7 @@ public void shouldSucceedWhenAllPeersAgree() {

@Test
public void shouldIgnorePeersThatDoNotHaveThePivotBlock() {
pivotBlockRetriever = createPivotBlockRetriever(3, 1, 1);
pivotBlockRetriever = createPivotBlockRetriever(3, 1);
EthProtocolManagerTestUtil.disableEthSchedulerAutoRun(ethProtocolManager);

final Responder responder =
Expand Down Expand Up @@ -208,7 +207,7 @@ public void shouldIgnorePeersThatAreNotFullyValidated() {

@Test
public void shouldQueryBestPeersFirst() {
pivotBlockRetriever = createPivotBlockRetriever(2, 1, 1);
pivotBlockRetriever = createPivotBlockRetriever(2, 1);
EthProtocolManagerTestUtil.disableEthSchedulerAutoRun(ethProtocolManager);

final Responder responder =
Expand All @@ -235,7 +234,7 @@ public void shouldQueryBestPeersFirst() {

@Test
public void shouldRecoverFromUnresponsivePeer() {
pivotBlockRetriever = createPivotBlockRetriever(2, 1, 1);
pivotBlockRetriever = createPivotBlockRetriever(2, 1);
EthProtocolManagerTestUtil.disableEthSchedulerAutoRun(ethProtocolManager);

final Responder responder =
Expand Down Expand Up @@ -270,7 +269,7 @@ public void shouldRecoverFromUnresponsivePeer() {
@Test
public void shouldRetryWhenPeersDisagreeOnPivot_successfulRetry() {
final long pivotBlockDelta = 1;
pivotBlockRetriever = createPivotBlockRetriever(2, pivotBlockDelta, 1);
pivotBlockRetriever = createPivotBlockRetriever(2, pivotBlockDelta);

final Responder responderA =
RespondingEthPeer.blockchainResponder(blockchain, protocolContext.getWorldStateArchive());
Expand Down Expand Up @@ -300,46 +299,10 @@ public void shouldRetryWhenPeersDisagreeOnPivot_successfulRetry() {
.isCompletedWithValue(new FastSyncState(blockchain.getBlockHeader(newPivotBlock).get()));
}

@Test
public void shouldRetryWhenPeersDisagreeOnPivot_exceedMaxRetries() {
final long pivotBlockDelta = 1;
pivotBlockRetriever = createPivotBlockRetriever(2, pivotBlockDelta, 1);

final Responder responderA =
RespondingEthPeer.blockchainResponder(blockchain, protocolContext.getWorldStateArchive());
final RespondingEthPeer respondingPeerA =
EthProtocolManagerTestUtil.createPeer(ethProtocolManager, 1000);

final Responder responderB =
responderForFakeBlocks(PIVOT_BLOCK_NUMBER, PIVOT_BLOCK_NUMBER - pivotBlockDelta);
final RespondingEthPeer respondingPeerB =
EthProtocolManagerTestUtil.createPeer(ethProtocolManager, 1000);

// Execute task and wait for response
final CompletableFuture<FastSyncState> future = pivotBlockRetriever.downloadPivotBlockHeader();
respondingPeerA.respond(responderA);
respondingPeerB.respond(responderB);

// When disagreement is detected, we should push the pivot block back and retry
final long newPivotBlock = PIVOT_BLOCK_NUMBER - 1;
assertThat(future).isNotCompleted();
assertThat(pivotBlockRetriever.pivotBlockNumber).hasValue(newPivotBlock);

// Another round of invalid responses should lead to failure
respondingPeerA.respond(responderA);
respondingPeerB.respond(responderB);

assertThat(future).isCompletedExceptionally();
assertThatThrownBy(future::get)
.hasRootCauseInstanceOf(FastSyncException.class)
.extracting(e -> ((FastSyncException) ExceptionUtils.rootCause(e)).getError())
.isEqualTo(FastSyncError.PIVOT_BLOCK_HEADER_MISMATCH);
}

@Test
public void shouldRetryWhenPeersDisagreeOnPivot_pivotInvalidOnRetry() {
final long pivotBlockDelta = PIVOT_BLOCK_NUMBER + 1;
pivotBlockRetriever = createPivotBlockRetriever(2, pivotBlockDelta, 1);
pivotBlockRetriever = createPivotBlockRetriever(2, pivotBlockDelta);

final Responder responderA =
RespondingEthPeer.blockchainResponder(blockchain, protocolContext.getWorldStateArchive());
Expand Down