Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: determine F3 participants relative to current network name #12597

Merged
merged 8 commits into from
Oct 17, 2024
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
- Fix a bug in the `lotus-shed indexes backfill-events` command that may result in either duplicate events being backfilled where there are existing events (such an operation *should* be idempotent) or events erroneously having duplicate `logIndex` values when queried via ETH APIs. ([filecoin-project/lotus#12567](https://github.com/filecoin-project/lotus/pull/12567))
- Event APIs (Eth events and actor events) should only return reverted events if client queries by specific block hash / tipset. Eth and actor event subscription APIs should always return reverted events to enable accurate observation of real-time changes. ([filecoin-project/lotus#12585](https://github.com/filecoin-project/lotus/pull/12585))
- Add logic to check if the miner's owner address is delegated (f4 address). If it is delegated, the `lotus-shed sectors termination-estimate` command now sends the termination state call using the worker ID. This fix resolves the issue where termination-estimate did not function correctly for miners with delegated owner addresses. ([filecoin-project/lotus#12569](https://github.com/filecoin-project/lotus/pull/12569))
- Fix a bug in F3 participation API where valid leases may get removed due to dynamic manifest update. ([filecoin-project/lotus#12597](https://github.com/filecoin-project/lotus/pull/12597))

## Improvements

Expand Down
2 changes: 1 addition & 1 deletion chain/lf3/f3.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ func (fff *F3) runSigningLoop(ctx context.Context) {
clear(alreadyParticipated)
}

participants := fff.leaser.getParticipantsByInstance(mb.Payload.Instance)
participants := fff.leaser.getParticipantsByInstance(mb.NetworkName, mb.Payload.Instance)
for _, id := range participants {
if _, ok := alreadyParticipated[id]; ok {
continue
Expand Down
16 changes: 13 additions & 3 deletions chain/lf3/participation_lease.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,15 +112,25 @@ func (l *leaser) participate(ticket api.F3ParticipationTicket) (api.F3Participat
return newLease, nil
}

func (l *leaser) getParticipantsByInstance(instance uint64) []uint64 {
func (l *leaser) getParticipantsByInstance(network gpbft.NetworkName, instance uint64) []uint64 {
l.mutex.Lock()
defer l.mutex.Unlock()
currentManifest, _ := l.status()
currentNetwork := currentManifest.NetworkName
if currentNetwork != network {
return nil
}
var participants []uint64
for id, lease := range l.leases {
if instance > lease.ToInstance() {
if currentNetwork != lease.Network {
// Lazily delete any lease that does not belong to network, likely acquired from
// prior manifests.
delete(l.leases, id)
log.Warnf("lost F3 participation lease for miner %d at instance %d due to network mismatch: %s != %s", id, instance, currentNetwork, lease.Network)
} else if instance > lease.ToInstance() {
// Lazily delete the expired leases.
delete(l.leases, id)
log.Warnf("lost F3 participation lease for miner %d", id)
log.Warnf("lost F3 participation lease for miner %d due to instance (%d) > lease to instance (%d)", id, instance, lease.ToInstance())
} else {
participants = append(participants, id)
}
Expand Down
6 changes: 3 additions & 3 deletions chain/lf3/participation_lease_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,18 +42,18 @@ func TestLeaser(t *testing.T) {
require.NoError(t, err)

// Both participants should still be valid.
participants := subject.getParticipantsByInstance(11)
participants := subject.getParticipantsByInstance(testManifest.NetworkName, 11)
require.Len(t, participants, 2)
require.Contains(t, participants, uint64(123))
require.Contains(t, participants, uint64(456))

// After instance 16, only participant 456 should be valid.
participants = subject.getParticipantsByInstance(16)
participants = subject.getParticipantsByInstance(testManifest.NetworkName, 16)
require.Len(t, participants, 1)
require.Contains(t, participants, uint64(456))

// After instance 17, no participant must have a lease.
participants = subject.getParticipantsByInstance(17)
participants = subject.getParticipantsByInstance(testManifest.NetworkName, 17)
require.Empty(t, participants)
})
t.Run("expired ticket", func(t *testing.T) {
Expand Down
67 changes: 60 additions & 7 deletions itests/f3_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package itests

import (
"context"
"sync"
"testing"
"time"

Expand Down Expand Up @@ -36,6 +37,7 @@ type testEnv struct {
m *manifest.Manifest
t *testing.T
testCtx context.Context
debug bool
}

// Test that checks that F3 is enabled successfully,
Expand Down Expand Up @@ -194,6 +196,24 @@ func (e *testEnv) waitFor(f func(n *kit.TestFullNode) bool, timeout time.Duratio
e.t.Helper()
require.Eventually(e.t, func() bool {
e.t.Helper()
defer func() {
if e.debug {
var wg sync.WaitGroup
printProgress := func(index int, n *kit.TestFullNode) {
defer wg.Done()
if progress, err := n.F3GetProgress(e.testCtx); err != nil {
e.t.Logf("Node #%d progress: err: %v", index, err)
} else {
e.t.Logf("Node #%d progress: %v", index, progress)
}
}
for i, n := range e.minerFullNodes {
wg.Add(1)
go printProgress(i, n)
}
wg.Wait()
}
}()
for _, n := range e.minerFullNodes {
if !f(n) {
return false
Expand All @@ -209,8 +229,42 @@ func (e *testEnv) waitFor(f func(n *kit.TestFullNode) bool, timeout time.Duratio
// and the second full-node is an observer that is not directly connected to
// a miner. The last return value is the manifest sender for the network.
func setup(t *testing.T, blocktime time.Duration) *testEnv {
manif := lf3.NewManifest(BaseNetworkName+"/1", DefaultFinality, DefaultBootstrapEpoch, blocktime, cid.Undef)
return setupWithStaticManifest(t, manif, false)
return setupWithStaticManifest(t, newTestManifest(blocktime), false)
}

func newTestManifest(blocktime time.Duration) *manifest.Manifest {
return &manifest.Manifest{
ProtocolVersion: manifest.VersionCapability,
BootstrapEpoch: DefaultBootstrapEpoch,
NetworkName: BaseNetworkName + "/1",
InitialPowerTable: cid.Undef,
CommitteeLookback: manifest.DefaultCommitteeLookback,
CatchUpAlignment: blocktime / 2,
Gpbft: manifest.GpbftConfig{
// Use smaller time intervals for more responsive test progress/assertion.
Delta: 250 * time.Millisecond,
DeltaBackOffExponent: 1.3,
MaxLookaheadRounds: 5,
RebroadcastBackoffBase: 500 * time.Millisecond,
RebroadcastBackoffSpread: 0.1,
RebroadcastBackoffExponent: 1.3,
RebroadcastBackoffMax: 1 * time.Second,
},
EC: manifest.EcConfig{
Period: blocktime,
Finality: DefaultFinality,
DelayMultiplier: manifest.DefaultEcConfig.DelayMultiplier,
BaseDecisionBackoffTable: manifest.DefaultEcConfig.BaseDecisionBackoffTable,
HeadLookback: 0,
Finalize: true,
},
CertificateExchange: manifest.CxConfig{
ClientRequestTimeout: manifest.DefaultCxConfig.ClientRequestTimeout,
ServerRequestTimeout: manifest.DefaultCxConfig.ServerRequestTimeout,
MinimumPollInterval: blocktime,
MaximumPollInterval: 4 * blocktime,
},
}
}

func setupWithStaticManifest(t *testing.T, manif *manifest.Manifest, testBootstrap bool) *testEnv {
Expand Down Expand Up @@ -262,10 +316,7 @@ func setupWithStaticManifest(t *testing.T, manif *manifest.Manifest, testBootstr
cancel()
}

m, err := n1.F3GetManifest(ctx)
require.NoError(t, err)

e := &testEnv{m: m, t: t, testCtx: ctx}
e := &testEnv{m: manif, t: t, testCtx: ctx}
// in case we want to use more full-nodes in the future
e.minerFullNodes = []*kit.TestFullNode{&n1, &n2, &n3}

Expand All @@ -275,14 +326,16 @@ func setupWithStaticManifest(t *testing.T, manif *manifest.Manifest, testBootstr
err = n.NetConnect(ctx, e.ms.PeerInfo())
require.NoError(t, err)
}

errgrp.Go(func() error {
defer func() {
require.NoError(t, manifestServerHost.Close())
}()
return e.ms.Run(ctx)
})

// Assure manifest is picked up by all nodes.
e.ms.UpdateManifest(manif)
e.waitTillManifestChange(manif, 20*time.Second)
masih marked this conversation as resolved.
Show resolved Hide resolved
return e
}

Expand Down
Loading