From a0043ca8d1d983ee6d6d4e2396b87a20c27371fc Mon Sep 17 00:00:00 2001 From: tnasu Date: Tue, 20 Dec 2022 15:51:33 +0900 Subject: [PATCH 1/6] Backport e2e-test of the latest tendermint main branch --- .github/workflows/coverage.yml | 2 + libs/log/lazy.go | 42 + libs/log/oc_logger_test.go | 6 +- libs/log/ocfmt_logger.go | 8 +- libs/log/ocfmt_logger_test.go | 17 +- test/e2e/Makefile | 7 +- test/e2e/README.md | 10 +- test/e2e/app/app.go | 18 +- test/e2e/app/snapshots.go | 12 +- test/e2e/app/state.go | 16 +- test/e2e/docker/Dockerfile | 1 - test/e2e/docker/entrypoint-maverick | 10 - test/e2e/generator/generate.go | 63 +- test/e2e/generator/generate_test.go | 14 - test/e2e/generator/main.go | 15 +- test/e2e/generator/random.go | 44 +- test/e2e/networks/ci.toml | 31 +- test/e2e/networks/simple.toml | 1 - test/e2e/node/config.go | 12 +- test/e2e/node/main.go | 55 +- test/e2e/pkg/infra/docker/docker.go | 85 + test/e2e/pkg/infra/provider.go | 20 + test/e2e/pkg/infrastructure.go | 80 + test/e2e/pkg/manifest.go | 32 +- test/e2e/pkg/testnet.go | 177 +- test/e2e/runner/cleanup.go | 3 +- test/e2e/runner/exec.go | 5 +- test/e2e/runner/load.go | 3 +- test/e2e/runner/main.go | 74 +- test/e2e/runner/perturb.go | 13 +- test/e2e/runner/setup.go | 122 +- test/e2e/runner/start.go | 5 +- test/e2e/runner/wait.go | 4 +- test/e2e/tests/e2e_test.go | 18 +- test/e2e/tests/evidence_test.go | 23 - test/e2e/tests/validator_test.go | 10 +- test/maverick/README.md | 49 - test/maverick/consensus/misbehavior.go | 401 ----- test/maverick/consensus/msgs.go | 115 -- test/maverick/consensus/reactor.go | 1425 --------------- test/maverick/consensus/replay.go | 551 ------ test/maverick/consensus/replay_file.go | 339 ---- test/maverick/consensus/replay_stubs.go | 92 - test/maverick/consensus/state.go | 1996 ---------------------- test/maverick/consensus/ticker.go | 134 -- test/maverick/consensus/wal.go | 407 ----- test/maverick/consensus/wal_generator.go | 235 --- test/maverick/main.go | 245 --- test/maverick/node/node.go | 1500 ---------------- test/maverick/node/privval.go | 385 ----- 50 files changed, 580 insertions(+), 8352 deletions(-) create mode 100644 libs/log/lazy.go delete mode 100755 test/e2e/docker/entrypoint-maverick delete mode 100644 test/e2e/generator/generate_test.go create mode 100644 test/e2e/pkg/infra/docker/docker.go create mode 100644 test/e2e/pkg/infra/provider.go create mode 100644 test/e2e/pkg/infrastructure.go delete mode 100644 test/maverick/README.md delete mode 100644 test/maverick/consensus/misbehavior.go delete mode 100644 test/maverick/consensus/msgs.go delete mode 100644 test/maverick/consensus/reactor.go delete mode 100644 test/maverick/consensus/replay.go delete mode 100644 test/maverick/consensus/replay_file.go delete mode 100644 test/maverick/consensus/replay_stubs.go delete mode 100644 test/maverick/consensus/state.go delete mode 100644 test/maverick/consensus/ticker.go delete mode 100644 test/maverick/consensus/wal.go delete mode 100644 test/maverick/consensus/wal_generator.go delete mode 100644 test/maverick/main.go delete mode 100644 test/maverick/node/node.go delete mode 100644 test/maverick/node/privval.go diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 38b9b3fbc..d720d6462 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -69,6 +69,7 @@ jobs: **/**.go go.mod go.sum + Makefile - uses: actions/download-artifact@v3 with: name: "${{ github.sha }}-${{ matrix.part }}" @@ -97,6 +98,7 @@ jobs: **/**.go go.mod go.sum + Makefile - uses: actions/download-artifact@v3 with: name: "${{ github.sha }}-00-coverage" diff --git a/libs/log/lazy.go b/libs/log/lazy.go new file mode 100644 index 000000000..e2f9f34fb --- /dev/null +++ b/libs/log/lazy.go @@ -0,0 +1,42 @@ +package log + +import ( + "fmt" + + tmbytes "github.com/line/ostracon/libs/bytes" +) + +type LazySprintf struct { + format string + args []interface{} +} + +// NewLazySprintf defers fmt.Sprintf until the Stringer interface is invoked. +// This is particularly useful for avoiding calling Sprintf when debugging is not +// active. +func NewLazySprintf(format string, args ...interface{}) *LazySprintf { + return &LazySprintf{format, args} +} + +func (l *LazySprintf) String() string { + return fmt.Sprintf(l.format, l.args...) +} + +type LazyBlockHash struct { + block hashable +} + +type hashable interface { + Hash() tmbytes.HexBytes +} + +// NewLazyBlockHash defers block Hash until the Stringer interface is invoked. +// This is particularly useful for avoiding calling Sprintf when debugging is not +// active. +func NewLazyBlockHash(block hashable) *LazyBlockHash { + return &LazyBlockHash{block} +} + +func (l *LazyBlockHash) String() string { + return l.block.Hash().String() +} diff --git a/libs/log/oc_logger_test.go b/libs/log/oc_logger_test.go index 6535c34d1..757f646b2 100644 --- a/libs/log/oc_logger_test.go +++ b/libs/log/oc_logger_test.go @@ -2,7 +2,7 @@ package log_test import ( "bytes" - "io/ioutil" + "io" "strings" "testing" @@ -90,11 +90,11 @@ func TestError(t *testing.T) { } func BenchmarkOCLoggerSimple(b *testing.B) { - benchmarkRunner(b, log.NewOCLogger(ioutil.Discard), baseInfoMessage) + benchmarkRunner(b, log.NewOCLogger(io.Discard), baseInfoMessage) } func BenchmarkOCLoggerContextual(b *testing.B) { - benchmarkRunner(b, log.NewOCLogger(ioutil.Discard), withInfoMessage) + benchmarkRunner(b, log.NewOCLogger(io.Discard), withInfoMessage) } func benchmarkRunner(b *testing.B, logger log.Logger, f func(log.Logger)) { diff --git a/libs/log/ocfmt_logger.go b/libs/log/ocfmt_logger.go index 2cd16648e..33009780c 100644 --- a/libs/log/ocfmt_logger.go +++ b/libs/log/ocfmt_logger.go @@ -65,7 +65,7 @@ func (l ocfmtLogger) Log(keyvals ...interface{}) error { switch keyvals[i] { case kitlevel.Key(): excludeIndexes = append(excludeIndexes, i) - switch keyvals[i+1].(type) { // nolint:gocritic + switch keyvals[i+1].(type) { //nolint:gocritic case string: lvl = keyvals[i+1].(string) case kitlevel.Value: @@ -87,6 +87,12 @@ func (l ocfmtLogger) Log(keyvals ...interface{}) error { if b, ok := keyvals[i+1].([]byte); ok { keyvals[i+1] = strings.ToUpper(hex.EncodeToString(b)) } + + // Realize stringers + if s, ok := keyvals[i+1].(fmt.Stringer); ok { + keyvals[i+1] = s.String() + } + } // Form a custom Ostracon line diff --git a/libs/log/ocfmt_logger_test.go b/libs/log/ocfmt_logger_test.go index c14079afe..b954fe084 100644 --- a/libs/log/ocfmt_logger_test.go +++ b/libs/log/ocfmt_logger_test.go @@ -3,7 +3,7 @@ package log_test import ( "bytes" "errors" - "io/ioutil" + "io" "math" "regexp" "testing" @@ -61,17 +61,17 @@ func TestOCFmtLogger(t *testing.T) { assert.Regexp(t, regexp.MustCompile(`N\[.+\] unknown \s+ hash=74657374206D65\n$`), buf.String()) } -func BenchmarkOCFmtLoggerSimple(b *testing.B) { - benchmarkRunnerKitlog(b, log.NewOCFmtLogger(ioutil.Discard), baseMessage) +func BenchmarkTMFmtLoggerSimple(b *testing.B) { + benchmarkRunnerKitlog(b, log.NewOCFmtLogger(io.Discard), baseMessage) } -func BenchmarkOCFmtLoggerContextual(b *testing.B) { - benchmarkRunnerKitlog(b, log.NewOCFmtLogger(ioutil.Discard), withMessage) +func BenchmarkTMFmtLoggerContextual(b *testing.B) { + benchmarkRunnerKitlog(b, log.NewOCFmtLogger(io.Discard), withMessage) } func TestOCFmtLoggerConcurrency(t *testing.T) { t.Parallel() - testConcurrency(t, log.NewOCFmtLogger(ioutil.Discard), 10000) + testConcurrency(t, log.NewOCFmtLogger(io.Discard), 10000) } func benchmarkRunnerKitlog(b *testing.B, logger kitlog.Logger, f func(kitlog.Logger)) { @@ -83,10 +83,9 @@ func benchmarkRunnerKitlog(b *testing.B, logger kitlog.Logger, f func(kitlog.Log } } -//nolint: errcheck // ignore errors var ( - baseMessage = func(logger kitlog.Logger) { logger.Log("foo_key", "foo_value") } - withMessage = func(logger kitlog.Logger) { kitlog.With(logger, "a", "b").Log("d", "f") } + baseMessage = func(logger kitlog.Logger) { logger.Log("foo_key", "foo_value") } //nolint:errcheck + withMessage = func(logger kitlog.Logger) { kitlog.With(logger, "a", "b").Log("d", "f") } //nolint:errcheck ) // These test are designed to be run with the race detector. diff --git a/test/e2e/Makefile b/test/e2e/Makefile index aafb91839..86ce05f8c 100644 --- a/test/e2e/Makefile +++ b/test/e2e/Makefile @@ -1,7 +1,7 @@ all: docker generator runner docker: - docker build --progress=plain --tag ostracon/e2e-node -f docker/Dockerfile ../.. + docker build --progress=plain --tag ostracon/e2e-node --tag ostracon/e2e-node:local-version -f docker/Dockerfile ../.. # We need to build support for database backends into the app in # order to build a binary with an Ostracon node in it (for built-in @@ -9,11 +9,6 @@ docker: node: go build -o build/node -tags libsodium,badgerdb,boltdb,cleveldb,rocksdb ./node -# To be used primarily by the e2e docker instance. If you want to produce this binary -# elsewhere, then run go build in the maverick directory. -maverick: - go build -o build/maverick -tags libsodium,badgerdb,boltdb,cleveldb,rocksdb ../maverick - generator: go build -o build/generator -tags libsodium ./generator diff --git a/test/e2e/README.md b/test/e2e/README.md index b0d0f52a5..19dcb191b 100644 --- a/test/e2e/README.md +++ b/test/e2e/README.md @@ -20,14 +20,14 @@ Random (but deterministic) combinations of testnets can be generated with `gener ```sh ./build/generator -d networks/generated/ -# Split networks into 4 groups (by filename) -./build/generator -g 4 -d networks/generated/ +# Split networks into 8 groups (by filename) +./build/generator -g 8 -d networks/generated/ ``` Multiple testnets can be run with the `run-multiple.sh` script: ```sh -./run-multiple.sh networks/generated/gen-group0[0123]-*.toml +./run-multiple.sh networks/generated/gen-group3-*.toml ``` ## Test Stages @@ -54,7 +54,7 @@ Auxiliary commands: * `logs`: outputs all node logs. -* `tail`: tails (follows) node logs until cancelled. +* `tail`: tails (follows) node logs until canceled. ## Tests @@ -73,7 +73,7 @@ Optionally, `E2E_NODE` specifies the name of a single testnet node to test. These environment variables can also be specified in `tests/e2e_test.go` to run tests from an editor or IDE: -``` +```go func init() { // This can be used to manually specify a testnet manifest and/or node to // run tests against. The testnet must have been started by the runner first. diff --git a/test/e2e/app/app.go b/test/e2e/app/app.go index d2def07cb..643e21372 100644 --- a/test/e2e/app/app.go +++ b/test/e2e/app/app.go @@ -8,13 +8,13 @@ import ( "os" "path/filepath" "strconv" - - cryptoenc "github.com/line/ostracon/crypto/encoding" - "github.com/line/ostracon/proto/ostracon/crypto" + "time" "github.com/line/ostracon/abci/example/code" abci "github.com/line/ostracon/abci/types" + cryptoenc "github.com/line/ostracon/crypto/encoding" "github.com/line/ostracon/libs/log" + "github.com/line/ostracon/proto/ostracon/crypto" "github.com/line/ostracon/version" ) @@ -73,6 +73,13 @@ type Config struct { // // height <-> pubkey <-> voting power ValidatorUpdates map[string]map[string]uint8 `toml:"validator_update"` + + // Add artificial delays to each of the main ABCI calls to mimic computation time + // of the application + PrepareProposalDelay time.Duration `toml:"prepare_proposal_delay"` + ProcessProposalDelay time.Duration `toml:"process_proposal_delay"` + CheckTxDelay time.Duration `toml:"check_tx_delay"` + // TODO: add vote extension and finalize block delays once completed (@cmwaters) } func DefaultConfig(dir string) *Config { @@ -139,6 +146,11 @@ func (app *Application) CheckTx(req abci.RequestCheckTx) abci.ResponseCheckTx { Log: err.Error(), } } + + if app.cfg.CheckTxDelay != 0 { + time.Sleep(app.cfg.CheckTxDelay) + } + return abci.ResponseCheckTx{Code: code.CodeTypeOK, GasWanted: 1} } diff --git a/test/e2e/app/snapshots.go b/test/e2e/app/snapshots.go index 4ef397866..724d4de5c 100644 --- a/test/e2e/app/snapshots.go +++ b/test/e2e/app/snapshots.go @@ -1,11 +1,9 @@ -// nolint: gosec package app import ( "encoding/json" "errors" "fmt" - "io/ioutil" "math" "os" "path/filepath" @@ -30,7 +28,7 @@ type SnapshotStore struct { // NewSnapshotStore creates a new snapshot store. func NewSnapshotStore(dir string) (*SnapshotStore, error) { store := &SnapshotStore{dir: dir} - if err := os.MkdirAll(dir, 0755); err != nil { + if err := os.MkdirAll(dir, 0o755); err != nil { return nil, err } if err := store.loadMetadata(); err != nil { @@ -45,7 +43,7 @@ func (s *SnapshotStore) loadMetadata() error { file := filepath.Join(s.dir, "metadata.json") metadata := []abci.Snapshot{} - bz, err := ioutil.ReadFile(file) + bz, err := os.ReadFile(file) switch { case errors.Is(err, os.ErrNotExist): case err != nil: @@ -72,7 +70,7 @@ func (s *SnapshotStore) saveMetadata() error { // save the file to a new file and move it to make saving atomic. newFile := filepath.Join(s.dir, "metadata.json.new") file := filepath.Join(s.dir, "metadata.json") - err = ioutil.WriteFile(newFile, bz, 0644) // nolint: gosec + err = os.WriteFile(newFile, bz, 0o644) //nolint: gosec if err != nil { return err } @@ -93,7 +91,7 @@ func (s *SnapshotStore) Create(state *State) (abci.Snapshot, error) { Hash: hashItems(state.Values), Chunks: byteChunks(bz), } - err = ioutil.WriteFile(filepath.Join(s.dir, fmt.Sprintf("%v.json", state.Height)), bz, 0644) + err = os.WriteFile(filepath.Join(s.dir, fmt.Sprintf("%v.json", state.Height)), bz, 0o644) //nolint:gosec if err != nil { return abci.Snapshot{}, err } @@ -122,7 +120,7 @@ func (s *SnapshotStore) LoadChunk(height uint64, format uint32, chunk uint32) ([ defer s.RUnlock() for _, snapshot := range s.metadata { if snapshot.Height == height && snapshot.Format == format { - bz, err := ioutil.ReadFile(filepath.Join(s.dir, fmt.Sprintf("%v.json", height))) + bz, err := os.ReadFile(filepath.Join(s.dir, fmt.Sprintf("%v.json", height))) if err != nil { return nil, err } diff --git a/test/e2e/app/state.go b/test/e2e/app/state.go index 1ede6fb4c..b07a6b9fc 100644 --- a/test/e2e/app/state.go +++ b/test/e2e/app/state.go @@ -1,4 +1,3 @@ -//nolint: gosec package app import ( @@ -6,15 +5,16 @@ import ( "encoding/json" "errors" "fmt" - "io/ioutil" "os" "path/filepath" "sort" "sync" ) -const stateFileName = "app_state.json" -const prevStateFileName = "prev_app_state.json" +const ( + stateFileName = "app_state.json" + prevStateFileName = "prev_app_state.json" +) // State is the application state. type State struct { @@ -52,11 +52,11 @@ func NewState(dir string, persistInterval uint64) (*State, error) { // load loads state from disk. It does not take out a lock, since it is called // during construction. func (s *State) load() error { - bz, err := ioutil.ReadFile(s.currentFile) + bz, err := os.ReadFile(s.currentFile) if err != nil { // if the current state doesn't exist then we try recover from the previous state if errors.Is(err, os.ErrNotExist) { - bz, err = ioutil.ReadFile(s.previousFile) + bz, err = os.ReadFile(s.previousFile) if err != nil { return fmt.Errorf("failed to read both current and previous state (%q): %w", s.previousFile, err) @@ -82,7 +82,7 @@ func (s *State) save() error { // We write the state to a separate file and move it to the destination, to // make it atomic. newFile := fmt.Sprintf("%v.new", s.currentFile) - err = ioutil.WriteFile(newFile, bz, 0644) + err = os.WriteFile(newFile, bz, 0o644) //nolint:gosec if err != nil { return fmt.Errorf("failed to write state to %q: %w", s.currentFile, err) } @@ -160,7 +160,7 @@ func (s *State) Commit() (uint64, []byte, error) { } func (s *State) Rollback() error { - bz, err := ioutil.ReadFile(s.previousFile) + bz, err := os.ReadFile(s.previousFile) if err != nil { return fmt.Errorf("failed to read state from %q: %w", s.previousFile, err) } diff --git a/test/e2e/docker/Dockerfile b/test/e2e/docker/Dockerfile index 102545f82..a1218fe42 100644 --- a/test/e2e/docker/Dockerfile +++ b/test/e2e/docker/Dockerfile @@ -43,7 +43,6 @@ RUN cd ${SRCDIR} && go mod download COPY . ${SRCDIR} COPY test/e2e/docker/entrypoint* /usr/bin/ RUN cd ${SRCDIR} && make build && cp build/ostracon /usr/bin/ostracon -RUN cd ${SRCDIR}/test/e2e && make maverick && cp build/maverick /usr/bin/maverick RUN cd ${SRCDIR}/test/e2e && make node && cp build/node /usr/bin/app # Set up runtime directory. We don't use a separate runtime image since we need diff --git a/test/e2e/docker/entrypoint-maverick b/test/e2e/docker/entrypoint-maverick deleted file mode 100755 index 9d152ebb1..000000000 --- a/test/e2e/docker/entrypoint-maverick +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env bash - -# Forcibly remove any stray UNIX sockets left behind from previous runs -rm -rf /var/run/privval.sock /var/run/app.sock - -/usr/bin/app /ostracon/config/app.toml & - -sleep 1 - -/usr/bin/maverick "$@" diff --git a/test/e2e/generator/generate.go b/test/e2e/generator/generate.go index 13dd6984e..502310515 100644 --- a/test/e2e/generator/generate.go +++ b/test/e2e/generator/generate.go @@ -4,8 +4,8 @@ import ( "fmt" "math/rand" "sort" - "strconv" "strings" + "time" e2e "github.com/line/ostracon/test/e2e/pkg" ) @@ -22,7 +22,9 @@ var ( }, "validators": {"genesis", "initchain"}, } - + nodeVersions = weightedChoice{ + "": 2, + } // The following specify randomly chosen values for testnet nodes. nodeDatabases = uniformChoice{"goleveldb", "cleveldb", "rocksdb", "boltdb", "badgerdb"} ipv6 = uniformChoice{false, true} @@ -34,24 +36,25 @@ var ( nodeStateSyncs = uniformChoice{false, true} nodePersistIntervals = uniformChoice{0, 1, 5} nodeSnapshotIntervals = uniformChoice{0, 3} - nodeRetainBlocks = uniformChoice{0, 1, 5} - nodePerturbations = probSetChoice{ + nodeRetainBlocks = uniformChoice{ + 0, + 2 * int(e2e.EvidenceAgeHeight), + 4 * int(e2e.EvidenceAgeHeight), + } + abciDelays = uniformChoice{"none", "small", "large"} + nodePerturbations = probSetChoice{ "disconnect": 0.1, "pause": 0.1, "kill": 0.1, "restart": 0.1, } - nodeMisbehaviors = weightedChoice{ - // FIXME: evidence disabled due to node panicing when not - // having sufficient block history to process evidence. - // https://github.com/tendermint/tendermint/issues/5617 - // misbehaviorOption{"double-prevote"}: 1, - misbehaviorOption{}: 9, - } ) // Generate generates random testnets using the given RNG. -func Generate(r *rand.Rand) ([]e2e.Manifest, error) { +func Generate(r *rand.Rand, multiversion string) ([]e2e.Manifest, error) { + if multiversion != "" { + nodeVersions[multiversion] = 1 + } manifests := []e2e.Manifest{} for _, opt := range combinations(testnetCombinations) { manifest, err := generateTestnet(r, opt) @@ -75,6 +78,17 @@ func generateTestnet(r *rand.Rand, opt map[string]interface{}) (e2e.Manifest, er Nodes: map[string]*e2e.ManifestNode{}, } + switch abciDelays.Choose(r).(string) { + case "none": + case "small": + manifest.PrepareProposalDelay = 100 * time.Millisecond + manifest.ProcessProposalDelay = 100 * time.Millisecond + case "large": + manifest.PrepareProposalDelay = 200 * time.Millisecond + manifest.ProcessProposalDelay = 200 * time.Millisecond + manifest.CheckTxDelay = 20 * time.Millisecond + } + var numSeeds, numValidators, numFulls, numLightClients int switch opt["topology"].(string) { case "single": @@ -205,6 +219,7 @@ func generateNode( r *rand.Rand, mode e2e.Mode, startAt int64, initialHeight int64, forceArchive bool, ) *e2e.ManifestNode { node := e2e.ManifestNode{ + Version: nodeVersions.Choose(r).(string), Mode: string(mode), StartAt: startAt, Database: nodeDatabases.Choose(r).(string), @@ -224,17 +239,6 @@ func generateNode( node.SnapshotInterval = 3 } - if node.Mode == string(e2e.ModeValidator) { - misbehaveAt := startAt + 5 + int64(r.Intn(10)) - if startAt == 0 { - misbehaveAt += initialHeight - 1 - } - node.Misbehaviors = nodeMisbehaviors.Choose(r).(misbehaviorOption).atHeight(misbehaveAt) - if len(node.Misbehaviors) != 0 { - node.PrivvalProtocol = "file" - } - } - // If a node which does not persist state also does not retain blocks, randomly // choose to either persist state or retain all blocks. if node.PersistInterval != nil && *node.PersistInterval == 0 && node.RetainBlocks > 0 { @@ -272,16 +276,3 @@ func generateLightNode(r *rand.Rand, startAt int64, providers []string) *e2e.Man func ptrUint64(i uint64) *uint64 { return &i } - -type misbehaviorOption struct { - misbehavior string -} - -func (m misbehaviorOption) atHeight(height int64) map[string]string { - misbehaviorMap := make(map[string]string) - if m.misbehavior == "" { - return misbehaviorMap - } - misbehaviorMap[strconv.Itoa(int(height))] = m.misbehavior - return misbehaviorMap -} diff --git a/test/e2e/generator/generate_test.go b/test/e2e/generator/generate_test.go deleted file mode 100644 index 22b19f79c..000000000 --- a/test/e2e/generator/generate_test.go +++ /dev/null @@ -1,14 +0,0 @@ -package main - -import ( - "math/rand" - "testing" - - "github.com/stretchr/testify/require" -) - -func TestGenerate(t *testing.T) { - manifests, err := Generate(rand.New(rand.NewSource(randomSeed))) - require.NoError(t, err) - require.NotNil(t, manifests) -} diff --git a/test/e2e/generator/main.go b/test/e2e/generator/main.go index a538d0992..97c6a3152 100644 --- a/test/e2e/generator/main.go +++ b/test/e2e/generator/main.go @@ -1,4 +1,3 @@ -//nolint: gosec package main import ( @@ -45,25 +44,31 @@ func NewCLI() *CLI { if err != nil { return err } - return cli.generate(dir, groups) + multiversion, err := cmd.Flags().GetString("multi-version") + if err != nil { + return err + } + return cli.generate(dir, groups, multiversion) }, } cli.root.PersistentFlags().StringP("dir", "d", "", "Output directory for manifests") _ = cli.root.MarkPersistentFlagRequired("dir") + cli.root.PersistentFlags().StringP("multi-version", "m", "", "Include multi-version testing."+ + "If multi-version is not specified, then only the current Tendermint version will be used in generated testnets.") cli.root.PersistentFlags().IntP("groups", "g", 0, "Number of groups") return cli } // generate generates manifests in a directory. -func (cli *CLI) generate(dir string, groups int) error { - err := os.MkdirAll(dir, 0755) +func (cli *CLI) generate(dir string, groups int, multiversion string) error { + err := os.MkdirAll(dir, 0o755) if err != nil { return err } - manifests, err := Generate(rand.New(rand.NewSource(randomSeed))) + manifests, err := Generate(rand.New(rand.NewSource(randomSeed)), multiversion) //nolint:gosec if err != nil { return err } diff --git a/test/e2e/generator/random.go b/test/e2e/generator/random.go index ec59a01b2..4312eb30d 100644 --- a/test/e2e/generator/random.go +++ b/test/e2e/generator/random.go @@ -56,28 +56,6 @@ func (uc uniformChoice) Choose(r *rand.Rand) interface{} { return uc[r.Intn(len(uc))] } -// weightedChoice chooses a single random key from a map of keys and weights. -type weightedChoice map[interface{}]uint - -func (wc weightedChoice) Choose(r *rand.Rand) interface{} { - total := 0 - choices := make([]interface{}, 0, len(wc)) - for choice, weight := range wc { - total += int(weight) - choices = append(choices, choice) - } - - rem := r.Intn(total) - for _, choice := range choices { - rem -= int(wc[choice]) - if rem <= 0 { - return choice - } - } - - return nil -} - // probSetChoice picks a set of strings based on each string's probability (0-1). type probSetChoice map[string]float64 @@ -105,3 +83,25 @@ func (usc uniformSetChoice) Choose(r *rand.Rand) []string { } return choices } + +// weightedChoice chooses a single random key from a map of keys and weights. +type weightedChoice map[interface{}]uint + +func (wc weightedChoice) Choose(r *rand.Rand) interface{} { + total := 0 + choices := make([]interface{}, 0, len(wc)) + for choice, weight := range wc { + total += int(weight) + choices = append(choices, choice) + } + + rem := r.Intn(total) + for _, choice := range choices { + rem -= int(wc[choice]) + if rem <= 0 { + return choice + } + } + + return nil +} diff --git a/test/e2e/networks/ci.toml b/test/e2e/networks/ci.toml index e1bbc68e8..0301b7893 100644 --- a/test/e2e/networks/ci.toml +++ b/test/e2e/networks/ci.toml @@ -4,6 +4,11 @@ ipv6 = true initial_height = 1000 initial_state = { initial01 = "a", initial02 = "b", initial03 = "c" } +prepare_proposal_delay = "100ms" +process_proposal_delay = "100ms" +check_tx_delay = "0ms" +# The most common case (e.g. Cosmos SDK-based chains). +abci_protocol = "builtin" [validators] validator01 = 100 @@ -27,22 +32,16 @@ validator05 = 50 [node.seed01] mode = "seed" -seeds = ["seed02"] - -[node.seed02] -mode = "seed" -seeds = ["seed01"] +perturb = ["restart"] [node.validator01] seeds = ["seed01"] snapshot_interval = 5 perturb = ["disconnect"] -misbehaviors = { 1018 = "double-prevote" } [node.validator02] -seeds = ["seed02"] +seeds = ["seed01"] database = "boltdb" -abci_protocol = "tcp" privval_protocol = "tcp" persist_interval = 0 perturb = ["restart"] @@ -50,42 +49,36 @@ perturb = ["restart"] [node.validator03] seeds = ["seed01"] database = "badgerdb" -# FIXME: should be grpc, disabled due to https://github.com/tendermint/tendermint/issues/5439 -#abci_protocol = "grpc" privval_protocol = "unix" persist_interval = 3 -retain_blocks = 3 +retain_blocks = 10 perturb = ["kill"] [node.validator04] persistent_peers = ["validator01"] database = "rocksdb" -abci_protocol = "builtin" perturb = ["pause"] [node.validator05] start_at = 1005 # Becomes part of the validator set at 1010 -seeds = ["seed02"] +persistent_peers = ["validator01", "full01"] database = "cleveldb" fast_sync = "v0" -# FIXME: should be grpc, disabled due to https://github.com/tendermint/tendermint/issues/5439 -#abci_protocol = "grpc" +mempool_version = "v1" privval_protocol = "tcp" perturb = ["kill", "pause", "disconnect", "restart"] [node.full01] start_at = 1010 mode = "full" -# FIXME: should be v2, disabled due to flake fast_sync = "v0" persistent_peers = ["validator01", "validator02", "validator03", "validator04", "validator05"] -retain_blocks = 1 +retain_blocks = 10 perturb = ["restart"] [node.full02] start_at = 1015 mode = "full" -# FIXME: should be v2, disabled due to flake fast_sync = "v0" state_sync = true seeds = ["seed01"] @@ -99,4 +92,4 @@ persistent_peers = ["validator01", "validator02", "validator03"] [node.light02] mode= "light" start_at= 1015 -persistent_peers = ["validator04", "full01", "validator05"] \ No newline at end of file +persistent_peers = ["validator04", "full01", "validator05"] diff --git a/test/e2e/networks/simple.toml b/test/e2e/networks/simple.toml index 05cda1819..96b81f79f 100644 --- a/test/e2e/networks/simple.toml +++ b/test/e2e/networks/simple.toml @@ -2,4 +2,3 @@ [node.validator02] [node.validator03] [node.validator04] - diff --git a/test/e2e/node/config.go b/test/e2e/node/config.go index dfa572dcf..f846f9467 100644 --- a/test/e2e/node/config.go +++ b/test/e2e/node/config.go @@ -1,4 +1,3 @@ -//nolint: goconst package main import ( @@ -12,10 +11,10 @@ import ( // Config is the application configuration. type Config struct { - ChainID string `toml:"chain_id"` - Listen string - Protocol string - Dir string + ChainID string `toml:"chain_id"` + Listen string `toml:"listen"` + Protocol string `toml:"protocol"` + Dir string `toml:"dir"` Mode string `toml:"mode"` PersistInterval uint64 `toml:"persist_interval"` SnapshotInterval uint64 `toml:"snapshot_interval"` @@ -24,7 +23,6 @@ type Config struct { PrivValServer string `toml:"privval_server"` PrivValKey string `toml:"privval_key"` PrivValState string `toml:"privval_state"` - Misbehaviors map[string]string `toml:"misbehaviors"` KeyType string `toml:"key_type"` } @@ -56,6 +54,8 @@ func LoadConfig(file string) (*Config, error) { // Validate validates the configuration. We don't do exhaustive config // validation here, instead relying on Testnet.Validate() to handle it. +// +//nolint:goconst func (cfg Config) Validate() error { switch { case cfg.ChainID == "": diff --git a/test/e2e/node/main.go b/test/e2e/node/main.go index 1c5a398fa..98abd0bd2 100644 --- a/test/e2e/node/main.go +++ b/test/e2e/node/main.go @@ -7,7 +7,6 @@ import ( "net/http" "os" "path/filepath" - "strconv" "strings" "time" @@ -29,8 +28,6 @@ import ( rpcserver "github.com/line/ostracon/rpc/jsonrpc/server" "github.com/line/ostracon/test/e2e/app" e2e "github.com/line/ostracon/test/e2e/pkg" - mcs "github.com/line/ostracon/test/maverick/consensus" - maverick "github.com/line/ostracon/test/maverick/node" ) var logger = log.NewOCLogger(log.NewSyncWriter(os.Stdout)) @@ -74,14 +71,10 @@ func run(configFile string) error { case "socket", "grpc": err = startApp(cfg) case "builtin": - if len(cfg.Misbehaviors) == 0 { - if cfg.Mode == string(e2e.ModeLight) { - err = startLightClient(cfg) - } else { - err = startNode(cfg) - } + if cfg.Mode == string(e2e.ModeLight) { + err = startLightClient(cfg) } else { - err = startMaverick(cfg) + err = startNode(cfg) } default: err = fmt.Errorf("invalid protocol %q", cfg.Protocol) @@ -110,7 +103,7 @@ func startApp(cfg *Config) error { if err != nil { return err } - logger.Info(fmt.Sprintf("Server listening on %v (%v protocol)", cfg.Listen, cfg.Protocol)) + logger.Info("start app", "msg", log.NewLazySprintf("Server listening on %v (%v protocol)", cfg.Listen, cfg.Protocol)) return nil } @@ -209,44 +202,6 @@ func startLightClient(cfg *Config) error { return nil } -// FIXME: Temporarily disconnected maverick until it is redesigned -// startMaverick starts a Maverick node that runs the application directly. It assumes the Ostracon -// configuration is in $OCHOME/config/ostracon.toml. -func startMaverick(cfg *Config) error { - app, err := app.NewApplication(cfg.App()) - if err != nil { - return err - } - - tmcfg, logger, nodeKey, err := setupNode() - if err != nil { - return fmt.Errorf("failed to setup config: %w", err) - } - - misbehaviors := make(map[int64]mcs.Misbehavior, len(cfg.Misbehaviors)) - for heightString, misbehaviorString := range cfg.Misbehaviors { - height, _ := strconv.ParseInt(heightString, 10, 64) - misbehaviors[height] = mcs.MisbehaviorList[misbehaviorString] - } - - privKey, _ := maverick.LoadOrGenFilePV(tmcfg.PrivValidatorKeyFile(), tmcfg.PrivValidatorStateFile(), tmcfg.PrivKeyType) - n, err := maverick.NewNode(tmcfg, - privKey, - nodeKey, - proxy.NewLocalClientCreator(app), - maverick.DefaultGenesisDocProviderFunc(tmcfg), - maverick.DefaultDBProvider, - maverick.DefaultMetricsProvider(tmcfg.Instrumentation), - logger, - misbehaviors, - ) - if err != nil { - return err - } - - return n.Start() -} - // startSigner starts a signer server connecting to the given endpoint. func startSigner(cfg *Config) error { filePV := privval.LoadFilePV(cfg.PrivValKey, cfg.PrivValState) @@ -269,7 +224,7 @@ func startSigner(cfg *Config) error { if err != nil { return err } - logger.Info(fmt.Sprintf("Remote signer connecting to %v", cfg.PrivValServer)) + logger.Info("start signer", "msg", log.NewLazySprintf("Remote signer connecting to %v", cfg.PrivValServer)) return nil } diff --git a/test/e2e/pkg/infra/docker/docker.go b/test/e2e/pkg/infra/docker/docker.go new file mode 100644 index 000000000..fbeee432b --- /dev/null +++ b/test/e2e/pkg/infra/docker/docker.go @@ -0,0 +1,85 @@ +package docker + +import ( + "bytes" + "os" + "path/filepath" + "text/template" + + e2e "github.com/line/ostracon/test/e2e/pkg" + "github.com/line/ostracon/test/e2e/pkg/infra" +) + +var _ infra.Provider = &Provider{} + +// Provider implements a docker-compose backed infrastructure provider. +type Provider struct { + Testnet *e2e.Testnet +} + +// Setup generates the docker-compose file and write it to disk, erroring if +// any of these operations fail. +func (p *Provider) Setup() error { + compose, err := dockerComposeBytes(p.Testnet) + if err != nil { + return err + } + //nolint: gosec + // G306: Expect WriteFile permissions to be 0600 or less + err = os.WriteFile(filepath.Join(p.Testnet.Dir, "docker-compose.yml"), compose, 0o644) + if err != nil { + return err + } + return nil +} + +// dockerComposeBytes generates a Docker Compose config file for a testnet and returns the +// file as bytes to be written out to disk. +func dockerComposeBytes(testnet *e2e.Testnet) ([]byte, error) { + // Must use version 2 Docker Compose format, to support IPv6. + tmpl, err := template.New("docker-compose").Parse(`version: '2.4' +networks: + {{ .Name }}: + labels: + e2e: true + driver: bridge +{{- if .IPv6 }} + enable_ipv6: true +{{- end }} + ipam: + driver: default + config: + - subnet: {{ .IP }} + +services: +{{- range .Nodes }} + {{ .Name }}: + labels: + e2e: true + container_name: {{ .Name }} + image: ostracon/e2e-node:{{ .Version }} +{{- if eq .ABCIProtocol "builtin" }} + entrypoint: /usr/bin/entrypoint-builtin +{{- end }} + init: true + ports: + - 26656 + - {{ if .ProxyPort }}{{ .ProxyPort }}:{{ end }}26657 + - 6060 + volumes: + - ./{{ .Name }}:/ostracon + networks: + {{ $.Name }}: + ipv{{ if $.IPv6 }}6{{ else }}4{{ end}}_address: {{ .IP }} + +{{end}}`) + if err != nil { + return nil, err + } + var buf bytes.Buffer + err = tmpl.Execute(&buf, testnet) + if err != nil { + return nil, err + } + return buf.Bytes(), nil +} diff --git a/test/e2e/pkg/infra/provider.go b/test/e2e/pkg/infra/provider.go new file mode 100644 index 000000000..03b821de3 --- /dev/null +++ b/test/e2e/pkg/infra/provider.go @@ -0,0 +1,20 @@ +package infra + +// Provider defines an API for manipulating the infrastructure of a +// specific set of testnet infrastructure. +type Provider interface { + + // Setup generates any necessary configuration for the infrastructure + // provider during testnet setup. + Setup() error +} + +// NoopProvider implements the provider interface by performing noops for every +// interface method. This may be useful if the infrastructure is managed by a +// separate process. +type NoopProvider struct { +} + +func (NoopProvider) Setup() error { return nil } + +var _ Provider = NoopProvider{} diff --git a/test/e2e/pkg/infrastructure.go b/test/e2e/pkg/infrastructure.go new file mode 100644 index 000000000..2fc0e4bac --- /dev/null +++ b/test/e2e/pkg/infrastructure.go @@ -0,0 +1,80 @@ +package e2e + +import ( + "encoding/json" + "fmt" + "net" + "os" +) + +const ( + dockerIPv4CIDR = "10.186.73.0/24" + dockerIPv6CIDR = "fd80:b10c::/48" + + globalIPv4CIDR = "0.0.0.0/0" +) + +// InfrastructureData contains the relevant information for a set of existing +// infrastructure that is to be used for running a testnet. +type InfrastructureData struct { + + // Provider is the name of infrastructure provider backing the testnet. + // For example, 'docker' if it is running locally in a docker network or + // 'digital-ocean', 'aws', 'google', etc. if it is from a cloud provider. + Provider string `json:"provider"` + + // Instances is a map of all of the machine instances on which to run + // processes for a testnet. + // The key of the map is the name of the instance, which each must correspond + // to the names of one of the testnet nodes defined in the testnet manifest. + Instances map[string]InstanceData `json:"instances"` + + // Network is the CIDR notation range of IP addresses that all of the instances' + // IP addresses are expected to be within. + Network string `json:"network"` +} + +// InstanceData contains the relevant information for a machine instance backing +// one of the nodes in the testnet. +type InstanceData struct { + IPAddress net.IP `json:"ip_address"` +} + +func NewDockerInfrastructureData(m Manifest) (InfrastructureData, error) { + netAddress := dockerIPv4CIDR + if m.IPv6 { + netAddress = dockerIPv6CIDR + } + _, ipNet, err := net.ParseCIDR(netAddress) + if err != nil { + return InfrastructureData{}, fmt.Errorf("invalid IP network address %q: %w", netAddress, err) + } + ipGen := newIPGenerator(ipNet) + ifd := InfrastructureData{ + Provider: "docker", + Instances: make(map[string]InstanceData), + Network: netAddress, + } + for name := range m.Nodes { + ifd.Instances[name] = InstanceData{ + IPAddress: ipGen.Next(), + } + } + return ifd, nil +} + +func InfrastructureDataFromFile(p string) (InfrastructureData, error) { + ifd := InfrastructureData{} + b, err := os.ReadFile(p) + if err != nil { + return InfrastructureData{}, err + } + err = json.Unmarshal(b, &ifd) + if err != nil { + return InfrastructureData{}, err + } + if ifd.Network == "" { + ifd.Network = globalIPv4CIDR + } + return ifd, nil +} diff --git a/test/e2e/pkg/manifest.go b/test/e2e/pkg/manifest.go index 93cbae1ee..6dfb7642b 100644 --- a/test/e2e/pkg/manifest.go +++ b/test/e2e/pkg/manifest.go @@ -3,6 +3,7 @@ package e2e import ( "fmt" "os" + "time" "github.com/BurntSushi/toml" ) @@ -56,6 +57,17 @@ type Manifest struct { // builtin will build a complete Ostracon node into the application and // launch it instead of launching a separate Ostracon process. ABCIProtocol string `toml:"abci_protocol"` + + // Add artificial delays to each of the main ABCI calls to mimic computation time + // of the application + PrepareProposalDelay time.Duration `toml:"prepare_proposal_delay"` + ProcessProposalDelay time.Duration `toml:"process_proposal_delay"` + CheckTxDelay time.Duration `toml:"check_tx_delay"` + // TODO: add vote extension and finalize block delay (@cmwaters) + + LoadTxSizeBytes int `toml:"load_tx_size_bytes"` + LoadTxBatchSize int `toml:"load_tx_batch_size"` + LoadTxConnections int `toml:"load_tx_connections"` } // ManifestNode represents a node in a testnet manifest. @@ -65,6 +77,13 @@ type ManifestNode struct { // is generated), and seed nodes run in seed mode with the PEX reactor enabled. Mode string `toml:"mode"` + // Version specifies which version of Ostracon this node is. Specifying different + // versions for different nodes allows for testing the interaction of different + // node's compatibility. Note that in order to use a node at a particular version, + // there must be a docker image of the test app tagged with this version present + // on the machine where the test is being run. + Version string `toml:"version"` + // Seeds is the list of node names to use as P2P seed nodes. Defaults to none. Seeds []string `toml:"seeds"` @@ -121,15 +140,10 @@ type ManifestNode struct { // restart: restarts the node, shutting it down with SIGTERM Perturb []string `toml:"perturb"` - // Misbehaviors sets how a validator behaves during consensus at a - // certain height. Multiple misbehaviors at different heights can be used - // - // An example of misbehaviors - // { 10 = "double-prevote", 20 = "double-prevote"} - // - // For more information, look at the readme in the maverick folder. - // A list of all behaviors can be found in ../maverick/consensus/behavior.go - Misbehaviors map[string]string `toml:"misbehaviors"` + // SendNoLoad determines if the e2e test should send load to this node. + // It defaults to false so unless the configured, the node will + // receive load. + SendNoLoad bool `toml:"send_no_load"` } // Save saves the testnet manifest to a file. diff --git a/test/e2e/pkg/testnet.go b/test/e2e/pkg/testnet.go index 54ae26900..880cd403e 100644 --- a/test/e2e/pkg/testnet.go +++ b/test/e2e/pkg/testnet.go @@ -1,4 +1,3 @@ -//nolint: gosec package e2e import ( @@ -11,24 +10,28 @@ import ( "sort" "strconv" "strings" + "time" "github.com/line/ostracon/crypto" "github.com/line/ostracon/crypto/ed25519" "github.com/line/ostracon/crypto/secp256k1" rpchttp "github.com/line/ostracon/rpc/client/http" - mcs "github.com/line/ostracon/test/maverick/consensus" ) const ( randomSeed int64 = 2308084734268 proxyPortFirst uint32 = 5701 - networkIPv4 = "10.186.73.0/24" - networkIPv6 = "fd80:b10c::/48" + + defaultBatchSize = 2 + defaultConnections = 1 + defaultTxSizeBytes = 1024 ) -type Mode string -type Protocol string -type Perturbation string +type ( + Mode string + Protocol string + Perturbation string +) const ( ModeValidator Mode = "validator" @@ -46,26 +49,36 @@ const ( PerturbationKill Perturbation = "kill" PerturbationPause Perturbation = "pause" PerturbationRestart Perturbation = "restart" + + EvidenceAgeHeight int64 = 7 + EvidenceAgeTime time.Duration = 500 * time.Millisecond ) // Testnet represents a single testnet. type Testnet struct { - Name string - File string - Dir string - IP *net.IPNet - InitialHeight int64 - InitialState map[string]string - Validators map[*Node]int64 - ValidatorUpdates map[int64]map[*Node]int64 - Nodes []*Node - KeyType string - ABCIProtocol string + Name string + File string + Dir string + IP *net.IPNet + InitialHeight int64 + InitialState map[string]string + Validators map[*Node]int64 + ValidatorUpdates map[int64]map[*Node]int64 + Nodes []*Node + KeyType string + LoadTxSizeBytes int + LoadTxBatchSize int + LoadTxConnections int + ABCIProtocol string + PrepareProposalDelay time.Duration + ProcessProposalDelay time.Duration + CheckTxDelay time.Duration } // Node represents an Ostracon node in a testnet. type Node struct { Name string + Version string Testnet *Testnet Mode Mode PrivvalKey crypto.PrivKey @@ -84,7 +97,9 @@ type Node struct { Seeds []*Node PersistentPeers []*Node Perturbations []Perturbation - Misbehaviors map[int64]string + + // SendNoLoad determines if the e2e test should send load to this node. + SendNoLoad bool } // LoadTestnet loads a testnet from a manifest file, using the filename to @@ -92,38 +107,32 @@ type Node struct { // The testnet generation must be deterministic, since it is generated // separately by the runner and the test cases. For this reason, testnets use a // random seed to generate e.g. keys. -func LoadTestnet(file string) (*Testnet, error) { - manifest, err := LoadManifest(file) - if err != nil { - return nil, err - } - dir := strings.TrimSuffix(file, filepath.Ext(file)) - - // Set up resource generators. These must be deterministic. - netAddress := networkIPv4 - if manifest.IPv6 { - netAddress = networkIPv6 - } - _, ipNet, err := net.ParseCIDR(netAddress) - if err != nil { - return nil, fmt.Errorf("invalid IP network address %q: %w", netAddress, err) - } - - ipGen := newIPGenerator(ipNet) +func LoadTestnet(manifest Manifest, fname string, ifd InfrastructureData) (*Testnet, error) { + dir := strings.TrimSuffix(fname, filepath.Ext(fname)) keyGen := newKeyGenerator(randomSeed) proxyPortGen := newPortGenerator(proxyPortFirst) + _, ipNet, err := net.ParseCIDR(ifd.Network) + if err != nil { + return nil, fmt.Errorf("invalid IP network address %q: %w", ifd.Network, err) + } testnet := &Testnet{ - Name: filepath.Base(dir), - File: file, - Dir: dir, - IP: ipGen.Network(), - InitialHeight: 1, - InitialState: manifest.InitialState, - Validators: map[*Node]int64{}, - ValidatorUpdates: map[int64]map[*Node]int64{}, - Nodes: []*Node{}, - ABCIProtocol: manifest.ABCIProtocol, + Name: filepath.Base(dir), + File: fname, + Dir: dir, + IP: ipNet, + InitialHeight: 1, + InitialState: manifest.InitialState, + Validators: map[*Node]int64{}, + ValidatorUpdates: map[int64]map[*Node]int64{}, + Nodes: []*Node{}, + LoadTxSizeBytes: manifest.LoadTxSizeBytes, + LoadTxBatchSize: manifest.LoadTxBatchSize, + LoadTxConnections: manifest.LoadTxConnections, + ABCIProtocol: manifest.ABCIProtocol, + PrepareProposalDelay: manifest.PrepareProposalDelay, + ProcessProposalDelay: manifest.ProcessProposalDelay, + CheckTxDelay: manifest.CheckTxDelay, } if len(manifest.KeyType) != 0 { testnet.KeyType = manifest.KeyType @@ -134,6 +143,15 @@ func LoadTestnet(file string) (*Testnet, error) { if testnet.ABCIProtocol == "" { testnet.ABCIProtocol = string(ProtocolBuiltin) } + if testnet.LoadTxConnections == 0 { + testnet.LoadTxConnections = defaultConnections + } + if testnet.LoadTxBatchSize == 0 { + testnet.LoadTxBatchSize = defaultBatchSize + } + if testnet.LoadTxSizeBytes == 0 { + testnet.LoadTxSizeBytes = defaultTxSizeBytes + } // Set up nodes, in alphabetical order (IPs and ports get same order). nodeNames := []string{} @@ -144,12 +162,21 @@ func LoadTestnet(file string) (*Testnet, error) { for _, name := range nodeNames { nodeManifest := manifest.Nodes[name] + ind, ok := ifd.Instances[name] + if !ok { + return nil, fmt.Errorf("information for node '%s' missing from infrastucture data", name) + } + v := nodeManifest.Version + if v == "" { + v = "local-version" + } node := &Node{ Name: name, + Version: v, Testnet: testnet, PrivvalKey: keyGen.Generate(manifest.KeyType), NodeKey: keyGen.Generate("ed25519"), - IP: ipGen.Next(), + IP: ind.IPAddress, ProxyPort: proxyPortGen.Next(), Mode: ModeValidator, Database: "goleveldb", @@ -162,7 +189,7 @@ func LoadTestnet(file string) (*Testnet, error) { SnapshotInterval: nodeManifest.SnapshotInterval, RetainBlocks: nodeManifest.RetainBlocks, Perturbations: []Perturbation{}, - Misbehaviors: make(map[int64]string), + SendNoLoad: nodeManifest.SendNoLoad, } if node.StartAt == testnet.InitialHeight { node.StartAt = 0 // normalize to 0 for initial nodes, since code expects this @@ -185,13 +212,6 @@ func LoadTestnet(file string) (*Testnet, error) { for _, p := range nodeManifest.Perturb { node.Perturbations = append(node.Perturbations, Perturbation(p)) } - for heightString, misbehavior := range nodeManifest.Misbehaviors { - height, err := strconv.ParseInt(heightString, 10, 64) - if err != nil { - return nil, fmt.Errorf("unable to parse height %s to int64: %w", heightString, err) - } - node.Misbehaviors[height] = misbehavior - } testnet.Nodes = append(testnet.Nodes, node) } @@ -336,6 +356,10 @@ func (n Node) Validate(testnet Testnet) error { if n.StateSync && n.StartAt == 0 { return errors.New("state synced nodes cannot start at the initial height") } + if n.RetainBlocks != 0 && n.RetainBlocks < uint64(EvidenceAgeHeight) { + return fmt.Errorf("retain_blocks must be greater or equal to max evidence age (%d)", + EvidenceAgeHeight) + } if n.PersistInterval == 0 && n.RetainBlocks > 0 { return errors.New("persist_interval=0 requires retain_blocks=0") } @@ -354,30 +378,6 @@ func (n Node) Validate(testnet Testnet) error { } } - if (n.PrivvalProtocol != "file" || n.Mode != "validator") && len(n.Misbehaviors) != 0 { - return errors.New("must be using \"file\" privval protocol to implement misbehaviors") - } - - for height, misbehavior := range n.Misbehaviors { - if height < n.StartAt { - return fmt.Errorf("misbehavior height %d is below node start height %d", - height, n.StartAt) - } - if height < testnet.InitialHeight { - return fmt.Errorf("misbehavior height %d is below network initial height %d", - height, testnet.InitialHeight) - } - exists := false - for possibleBehaviors := range mcs.MisbehaviorList { - if possibleBehaviors == misbehavior { - exists = true - } - } - if !exists { - return fmt.Errorf("misbehavior %s does not exist", misbehavior) - } - } - return nil } @@ -407,7 +407,7 @@ func (t Testnet) ArchiveNodes() []*Node { // RandomNode returns a random non-seed node. func (t Testnet) RandomNode() *Node { for { - node := t.Nodes[rand.Intn(len(t.Nodes))] + node := t.Nodes[rand.Intn(len(t.Nodes))] //nolint:gosec if node.Mode != ModeSeed { return node } @@ -429,19 +429,6 @@ func (t Testnet) HasPerturbations() bool { return false } -// LastMisbehaviorHeight returns the height of the last misbehavior. -func (t Testnet) LastMisbehaviorHeight() int64 { - lastHeight := int64(0) - for _, node := range t.Nodes { - for height := range node.Misbehaviors { - if height > lastHeight { - lastHeight = height - } - } - } - return lastHeight -} - // Address returns a P2P endpoint address for the node. func (n Node) AddressP2P(withID bool) string { ip := n.IP.String() @@ -483,7 +470,7 @@ type keyGenerator struct { func newKeyGenerator(seed int64) *keyGenerator { return &keyGenerator{ - random: rand.New(rand.NewSource(seed)), + random: rand.New(rand.NewSource(seed)), //nolint:gosec } } diff --git a/test/e2e/runner/cleanup.go b/test/e2e/runner/cleanup.go index 82060aa3f..39a05a4e6 100644 --- a/test/e2e/runner/cleanup.go +++ b/test/e2e/runner/cleanup.go @@ -6,6 +6,7 @@ import ( "os" "path/filepath" + "github.com/line/ostracon/libs/log" e2e "github.com/line/ostracon/test/e2e/pkg" ) @@ -59,7 +60,7 @@ func cleanupDir(dir string) error { return err } - logger.Info(fmt.Sprintf("Removing testnet directory %q", dir)) + logger.Info("cleanup dir", "msg", log.NewLazySprintf("Removing testnet directory %q", dir)) // On Linux, some local files in the volume will be owned by root since Ostracon // runs as root inside the container, so we need to clean them up from within a diff --git a/test/e2e/runner/exec.go b/test/e2e/runner/exec.go index f790f7fc1..e6e47ca0a 100644 --- a/test/e2e/runner/exec.go +++ b/test/e2e/runner/exec.go @@ -1,4 +1,3 @@ -//nolint: gosec package main import ( @@ -10,7 +9,7 @@ import ( // execute executes a shell command. func exec(args ...string) error { - cmd := osexec.Command(args[0], args[1:]...) + cmd := osexec.Command(args[0], args[1:]...) //nolint:gosec out, err := cmd.CombinedOutput() switch err := err.(type) { case nil: @@ -24,7 +23,7 @@ func exec(args ...string) error { // execVerbose executes a shell command while displaying its output. func execVerbose(args ...string) error { - cmd := osexec.Command(args[0], args[1:]...) + cmd := osexec.Command(args[0], args[1:]...) //nolint:gosec cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr return cmd.Run() diff --git a/test/e2e/runner/load.go b/test/e2e/runner/load.go index 71cca5133..9c66d226b 100644 --- a/test/e2e/runner/load.go +++ b/test/e2e/runner/load.go @@ -8,6 +8,7 @@ import ( "math" "time" + "github.com/line/ostracon/libs/log" rpchttp "github.com/line/ostracon/rpc/client/http" e2e "github.com/line/ostracon/test/e2e/pkg" "github.com/line/ostracon/types" @@ -58,7 +59,7 @@ func Load(ctx context.Context, testnet *e2e.Testnet, multiplier int) error { if success == 0 { return errors.New("failed to submit any transactions") } - logger.Info(fmt.Sprintf("Ending transaction load after %v txs (%.1f tx/s)...", + logger.Info("load", "msg", log.NewLazySprintf("Ending transaction load after %v txs (%.1f tx/s)...", success, float64(success)/time.Since(started).Seconds())) return nil } diff --git a/test/e2e/runner/main.go b/test/e2e/runner/main.go index fd72d65a3..5b836d888 100644 --- a/test/e2e/runner/main.go +++ b/test/e2e/runner/main.go @@ -2,6 +2,7 @@ package main import ( "context" + "errors" "fmt" "os" "strconv" @@ -10,11 +11,11 @@ import ( "github.com/line/ostracon/libs/log" e2e "github.com/line/ostracon/test/e2e/pkg" + "github.com/line/ostracon/test/e2e/pkg/infra" + "github.com/line/ostracon/test/e2e/pkg/infra/docker" ) -var ( - logger = log.NewOCLogger(log.NewSyncWriter(os.Stdout)) -) +var logger = log.NewOCLogger(log.NewSyncWriter(os.Stdout)) func main() { NewCLI().Run() @@ -25,6 +26,7 @@ type CLI struct { root *cobra.Command testnet *e2e.Testnet preserve bool + infp infra.Provider } // NewCLI sets up the CLI. @@ -40,19 +42,57 @@ func NewCLI() *CLI { if err != nil { return err } - testnet, err := e2e.LoadTestnet(file) + m, err := e2e.LoadManifest(file) if err != nil { return err } + inft, err := cmd.Flags().GetString("infrastructure-type") + if err != nil { + return err + } + + var ifd e2e.InfrastructureData + switch inft { + case "docker": + var err error + ifd, err = e2e.NewDockerInfrastructureData(m) + if err != nil { + return err + } + case "digital-ocean": + p, err := cmd.Flags().GetString("infrastructure-data") + if err != nil { + return err + } + if p == "" { + return errors.New("'--infrastructure-data' must be set when using the 'digital-ocean' infrastructure-type") + } + ifd, err = e2e.InfrastructureDataFromFile(p) + if err != nil { + return fmt.Errorf("parsing infrastructure data: %s", err) + } + default: + return fmt.Errorf("unknown infrastructure type '%s'", inft) + } + + testnet, err := e2e.LoadTestnet(m, file, ifd) + if err != nil { + return fmt.Errorf("loading testnet: %s", err) + } + cli.testnet = testnet + cli.infp = &infra.NoopProvider{} + if inft == "docker" { + cli.infp = &docker.Provider{Testnet: testnet} + } return nil }, RunE: func(cmd *cobra.Command, args []string) error { if err := Cleanup(cli.testnet); err != nil { return err } - if err := Setup(cli.testnet); err != nil { + if err := Setup(cli.testnet, cli.infp); err != nil { return err } @@ -71,14 +111,6 @@ func NewCLI() *CLI { return err } - if lastMisbehavior := cli.testnet.LastMisbehaviorHeight(); lastMisbehavior > 0 { - // wait for misbehaviors before starting perturbations. We do a separate - // wait for another 5 blocks, since the last misbehavior height may be - // in the past depending on network startup ordering. - if err := WaitUntil(cli.testnet, lastMisbehavior); err != nil { - return err - } - } if err := Wait(cli.testnet, 5); err != nil { // allow some txs to go through return err } @@ -114,6 +146,10 @@ func NewCLI() *CLI { cli.root.PersistentFlags().StringP("file", "f", "", "Testnet TOML manifest") _ = cli.root.MarkPersistentFlagRequired("file") + cli.root.PersistentFlags().StringP("infrastructure-type", "", "docker", "Backing infrastructure used to run the testnet. Either 'digital-ocean' or 'docker'") + + cli.root.PersistentFlags().StringP("infrastructure-data", "", "", "path to the json file containing the infrastructure data. Only used if the 'infrastructure-type' is set to a value other than 'docker'") + cli.root.Flags().BoolVarP(&cli.preserve, "preserve", "p", false, "Preserves the running of the test net after tests are completed") @@ -121,7 +157,7 @@ func NewCLI() *CLI { Use: "setup", Short: "Generates the testnet directory and configuration", RunE: func(cmd *cobra.Command, args []string) error { - return Setup(cli.testnet) + return Setup(cli.testnet, cli.infp) }, }) @@ -131,7 +167,7 @@ func NewCLI() *CLI { RunE: func(cmd *cobra.Command, args []string) error { _, err := os.Stat(cli.testnet.Dir) if os.IsNotExist(err) { - err = Setup(cli.testnet) + err = Setup(cli.testnet, cli.infp) } if err != nil { return err @@ -224,14 +260,14 @@ func NewCLI() *CLI { Min Block Interval Max Block Interval over a 100 block sampling period. - -Does not run any perbutations. + +Does not run any perturbations. `, RunE: func(cmd *cobra.Command, args []string) error { if err := Cleanup(cli.testnet); err != nil { return err } - if err := Setup(cli.testnet); err != nil { + if err := Setup(cli.testnet, cli.infp); err != nil { return err } @@ -241,7 +277,7 @@ Does not run any perbutations. go func() { err := Load(ctx, cli.testnet, 1) if err != nil { - logger.Error(fmt.Sprintf("Transaction load failed: %v", err.Error())) + logger.Error(fmt.Sprintf("Transaction load errored: %v", err.Error())) } chLoadResult <- err }() diff --git a/test/e2e/runner/perturb.go b/test/e2e/runner/perturb.go index eddd68da5..fa6756acf 100644 --- a/test/e2e/runner/perturb.go +++ b/test/e2e/runner/perturb.go @@ -4,6 +4,7 @@ import ( "fmt" "time" + "github.com/line/ostracon/libs/log" rpctypes "github.com/line/ostracon/rpc/core/types" e2e "github.com/line/ostracon/test/e2e/pkg" ) @@ -28,7 +29,7 @@ func PerturbNode(node *e2e.Node, perturbation e2e.Perturbation) (*rpctypes.Resul testnet := node.Testnet switch perturbation { case e2e.PerturbationDisconnect: - logger.Info(fmt.Sprintf("Disconnecting node %v...", node.Name)) + logger.Info("perturb node", "msg", log.NewLazySprintf("Disconnecting node %v...", node.Name)) if err := execDocker("network", "disconnect", testnet.Name+"_"+testnet.Name, node.Name); err != nil { return nil, err } @@ -38,7 +39,7 @@ func PerturbNode(node *e2e.Node, perturbation e2e.Perturbation) (*rpctypes.Resul } case e2e.PerturbationKill: - logger.Info(fmt.Sprintf("Killing node %v...", node.Name)) + logger.Info("perturb node", "msg", log.NewLazySprintf("Killing node %v...", node.Name)) if err := execCompose(testnet.Dir, "kill", "-s", "SIGKILL", node.Name); err != nil { return nil, err } @@ -47,7 +48,7 @@ func PerturbNode(node *e2e.Node, perturbation e2e.Perturbation) (*rpctypes.Resul } case e2e.PerturbationPause: - logger.Info(fmt.Sprintf("Pausing node %v...", node.Name)) + logger.Info("perturb node", "msg", log.NewLazySprintf("Pausing node %v...", node.Name)) if err := execCompose(testnet.Dir, "pause", node.Name); err != nil { return nil, err } @@ -57,7 +58,7 @@ func PerturbNode(node *e2e.Node, perturbation e2e.Perturbation) (*rpctypes.Resul } case e2e.PerturbationRestart: - logger.Info(fmt.Sprintf("Restarting node %v...", node.Name)) + logger.Info("perturb node", "msg", log.NewLazySprintf("Restarting node %v...", node.Name)) if err := execCompose(testnet.Dir, "restart", node.Name); err != nil { return nil, err } @@ -70,6 +71,8 @@ func PerturbNode(node *e2e.Node, perturbation e2e.Perturbation) (*rpctypes.Resul if err != nil { return nil, err } - logger.Info(fmt.Sprintf("Node %v recovered at height %v", node.Name, status.SyncInfo.LatestBlockHeight)) + logger.Info("perturb node", + "msg", + log.NewLazySprintf("Node %v recovered at height %v", node.Name, status.SyncInfo.LatestBlockHeight)) return status, nil } diff --git a/test/e2e/runner/setup.go b/test/e2e/runner/setup.go index 739aafc26..72a58c347 100644 --- a/test/e2e/runner/setup.go +++ b/test/e2e/runner/setup.go @@ -1,4 +1,3 @@ -// nolint: gosec package main import ( @@ -7,14 +6,11 @@ import ( "encoding/json" "errors" "fmt" - "io/ioutil" "os" "path/filepath" "regexp" "sort" - "strconv" "strings" - "text/template" "time" "github.com/BurntSushi/toml" @@ -22,9 +18,11 @@ import ( "github.com/line/ostracon/config" "github.com/line/ostracon/crypto/ed25519" cryptoenc "github.com/line/ostracon/crypto/encoding" + "github.com/line/ostracon/libs/log" "github.com/line/ostracon/p2p" "github.com/line/ostracon/privval" e2e "github.com/line/ostracon/test/e2e/pkg" + "github.com/line/ostracon/test/e2e/pkg/infra" "github.com/line/ostracon/types" ) @@ -41,19 +39,15 @@ const ( ) // Setup sets up the testnet configuration. -func Setup(testnet *e2e.Testnet) error { - logger.Info(fmt.Sprintf("Generating testnet files in %q", testnet.Dir)) +func Setup(testnet *e2e.Testnet, infp infra.Provider) error { + logger.Info("setup", "msg", log.NewLazySprintf("Generating testnet files in %q", testnet.Dir)) err := os.MkdirAll(testnet.Dir, os.ModePerm) if err != nil { return err } - compose, err := MakeDockerCompose(testnet) - if err != nil { - return err - } - err = ioutil.WriteFile(filepath.Join(testnet.Dir, "docker-compose.yml"), compose, 0644) + err = infp.Setup() if err != nil { return err } @@ -76,7 +70,7 @@ func Setup(testnet *e2e.Testnet) error { if node.Mode == e2e.ModeLight && strings.Contains(dir, "app") { continue } - err := os.MkdirAll(dir, 0755) + err := os.MkdirAll(dir, 0o755) if err != nil { return err } @@ -92,7 +86,7 @@ func Setup(testnet *e2e.Testnet) error { if err != nil { return err } - err = ioutil.WriteFile(filepath.Join(nodeDir, "config", "app.toml"), appCfg, 0644) + err = os.WriteFile(filepath.Join(nodeDir, "config", "app.toml"), appCfg, 0o644) //nolint:gosec if err != nil { return err } @@ -128,73 +122,6 @@ func Setup(testnet *e2e.Testnet) error { return nil } -// MakeDockerCompose generates a Docker Compose config for a testnet. -func MakeDockerCompose(testnet *e2e.Testnet) ([]byte, error) { - // Must use version 2 Docker Compose format, to support IPv6. - tmpl, err := template.New("docker-compose").Funcs(template.FuncMap{ - "misbehaviorsToString": func(misbehaviors map[int64]string) string { - str := "" - for height, misbehavior := range misbehaviors { - // after the first behavior set, a comma must be prepended - if str != "" { - str += "," - } - heightString := strconv.Itoa(int(height)) - str += misbehavior + "," + heightString - } - return str - }, - }).Parse(`version: '2.4' - -networks: - {{ .Name }}: - labels: - e2e: true - driver: bridge -{{- if .IPv6 }} - enable_ipv6: true -{{- end }} - ipam: - driver: default - config: - - subnet: {{ .IP }} - -services: -{{- range .Nodes }} - {{ .Name }}: - labels: - e2e: true - container_name: {{ .Name }} - image: ostracon/e2e-node -{{- if eq .ABCIProtocol "builtin" }} - entrypoint: /usr/bin/entrypoint-builtin -{{- else if .Misbehaviors }} - entrypoint: /usr/bin/entrypoint-maverick - command: ["node", "--misbehaviors", "{{ misbehaviorsToString .Misbehaviors }}"] -{{- end }} - init: true - ports: - - 26656 - - {{ if .ProxyPort }}{{ .ProxyPort }}:{{ end }}26657 - - 6060 - volumes: - - ./{{ .Name }}:/ostracon - networks: - {{ $.Name }}: - ipv{{ if $.IPv6 }}6{{ else }}4{{ end}}_address: {{ .IP }} - -{{end}}`) - if err != nil { - return nil, err - } - var buf bytes.Buffer - err = tmpl.Execute(&buf, testnet) - if err != nil { - return nil, err - } - return buf.Bytes(), nil -} - // MakeGenesis generates a genesis document. func MakeGenesis(testnet *e2e.Testnet) (types.GenesisDoc, error) { genesis := types.GenesisDoc{ @@ -323,16 +250,19 @@ func MakeConfig(node *e2e.Node) (*config.Config, error) { // MakeAppConfig generates an ABCI application config for a node. func MakeAppConfig(node *e2e.Node) ([]byte, error) { cfg := map[string]interface{}{ - "chain_id": node.Testnet.Name, - "dir": "data/app", - "listen": AppAddressUNIX, - "mode": node.Mode, - "proxy_port": node.ProxyPort, - "protocol": "socket", - "persist_interval": node.PersistInterval, - "snapshot_interval": node.SnapshotInterval, - "retain_blocks": node.RetainBlocks, - "key_type": node.PrivvalKey.Type(), + "chain_id": node.Testnet.Name, + "dir": "data/app", + "listen": AppAddressUNIX, + "mode": node.Mode, + "proxy_port": node.ProxyPort, + "protocol": "socket", + "persist_interval": node.PersistInterval, + "snapshot_interval": node.SnapshotInterval, + "retain_blocks": node.RetainBlocks, + "key_type": node.PrivvalKey.Type(), + "prepare_proposal_delay": node.Testnet.PrepareProposalDelay, + "process_proposal_delay": node.Testnet.ProcessProposalDelay, + "check_tx_delay": node.Testnet.CheckTxDelay, } switch node.ABCIProtocol { case e2e.ProtocolUNIX: @@ -344,7 +274,7 @@ func MakeAppConfig(node *e2e.Node) ([]byte, error) { cfg["protocol"] = "grpc" case e2e.ProtocolBuiltin: delete(cfg, "listen") - cfg["protocol"] = "builtin" + cfg["protocol"] = string(node.ABCIProtocol) default: return nil, fmt.Errorf("unexpected ABCI protocol setting %q", node.ABCIProtocol) } @@ -364,12 +294,6 @@ func MakeAppConfig(node *e2e.Node) ([]byte, error) { } } - misbehaviors := make(map[string]string) - for height, misbehavior := range node.Misbehaviors { - misbehaviors[strconv.Itoa(int(height))] = misbehavior - } - cfg["misbehaviors"] = misbehaviors - if len(node.Testnet.ValidatorUpdates) > 0 { validatorUpdates := map[string]map[string]int64{} for height, validators := range node.Testnet.ValidatorUpdates { @@ -405,11 +329,11 @@ func UpdateConfigStateSync(node *e2e.Node, height int64, hash []byte) error { // FIXME Apparently there's no function to simply load a config file without // involving the entire Viper apparatus, so we'll just resort to regexps. - bz, err := ioutil.ReadFile(cfgPath) + bz, err := os.ReadFile(cfgPath) if err != nil { return err } bz = regexp.MustCompile(`(?m)^trust_height =.*`).ReplaceAll(bz, []byte(fmt.Sprintf(`trust_height = %v`, height))) bz = regexp.MustCompile(`(?m)^trust_hash =.*`).ReplaceAll(bz, []byte(fmt.Sprintf(`trust_hash = "%X"`, hash))) - return ioutil.WriteFile(cfgPath, bz, 0644) + return os.WriteFile(cfgPath, bz, 0o644) //nolint:gosec } diff --git a/test/e2e/runner/start.go b/test/e2e/runner/start.go index c8f26e211..bd18d5694 100644 --- a/test/e2e/runner/start.go +++ b/test/e2e/runner/start.go @@ -5,6 +5,7 @@ import ( "sort" "time" + "github.com/line/ostracon/libs/log" e2e "github.com/line/ostracon/test/e2e/pkg" ) @@ -48,7 +49,7 @@ func Start(testnet *e2e.Testnet) error { if _, err := waitForNode(node, 0, 15*time.Second); err != nil { return err } - logger.Info(fmt.Sprintf("Node %v up on http://127.0.0.1:%v", node.Name, node.ProxyPort)) + logger.Info("start", "msg", log.NewLazySprintf("Node %v up on http://127.0.0.1:%v", node.Name, node.ProxyPort)) } networkHeight := testnet.InitialHeight @@ -103,7 +104,7 @@ func Start(testnet *e2e.Testnet) error { if err != nil { return err } - logger.Info(fmt.Sprintf("Node %v up on http://127.0.0.1:%v at height %v", + logger.Info("start", "msg", log.NewLazySprintf("Node %v up on http://127.0.0.1:%v at height %v", node.Name, node.ProxyPort, status.SyncInfo.LatestBlockHeight)) } diff --git a/test/e2e/runner/wait.go b/test/e2e/runner/wait.go index d49b0e723..563b1c5dd 100644 --- a/test/e2e/runner/wait.go +++ b/test/e2e/runner/wait.go @@ -1,9 +1,9 @@ package main import ( - "fmt" "time" + "github.com/line/ostracon/libs/log" e2e "github.com/line/ostracon/test/e2e/pkg" ) @@ -19,7 +19,7 @@ func Wait(testnet *e2e.Testnet, blocks int64) error { // WaitUntil waits until a given height has been reached. func WaitUntil(testnet *e2e.Testnet, height int64) error { - logger.Info(fmt.Sprintf("Waiting for all nodes to reach height %v...", height)) + logger.Info("wait until", "msg", log.NewLazySprintf("Waiting for all nodes to reach height %v...", height)) _, err := waitForAllNodes(testnet, height, waitingTime(len(testnet.Nodes))) if err != nil { return err diff --git a/test/e2e/tests/e2e_test.go b/test/e2e/tests/e2e_test.go index 1fdd5a603..e1b0427ba 100644 --- a/test/e2e/tests/e2e_test.go +++ b/test/e2e/tests/e2e_test.go @@ -66,23 +66,27 @@ func testNode(t *testing.T, testFunc func(*testing.T, e2e.Node)) { func loadTestnet(t *testing.T) e2e.Testnet { t.Helper() - manifest := os.Getenv("E2E_MANIFEST") - if manifest == "" { + manifestFile := os.Getenv("E2E_MANIFEST") + if manifestFile == "" { t.Skip("E2E_MANIFEST not set, not an end-to-end test run") } - if !filepath.IsAbs(manifest) { - manifest = filepath.Join("..", manifest) + if !filepath.IsAbs(manifestFile) { + manifestFile = filepath.Join("..", manifestFile) } testnetCacheMtx.Lock() defer testnetCacheMtx.Unlock() - if testnet, ok := testnetCache[manifest]; ok { + if testnet, ok := testnetCache[manifestFile]; ok { return testnet } + m, err := e2e.LoadManifest(manifestFile) + require.NoError(t, err) + ifd, err := e2e.NewDockerInfrastructureData(m) + require.NoError(t, err) - testnet, err := e2e.LoadTestnet(manifest) + testnet, err := e2e.LoadTestnet(m, manifestFile, ifd) require.NoError(t, err) - testnetCache[manifest] = *testnet + testnetCache[manifestFile] = *testnet return *testnet } diff --git a/test/e2e/tests/evidence_test.go b/test/e2e/tests/evidence_test.go index a35a0c78f..16be9d23b 100644 --- a/test/e2e/tests/evidence_test.go +++ b/test/e2e/tests/evidence_test.go @@ -4,8 +4,6 @@ import ( "bytes" "testing" - "github.com/stretchr/testify/require" - e2e "github.com/line/ostracon/test/e2e/pkg" "github.com/line/ostracon/types" ) @@ -15,7 +13,6 @@ import ( func TestEvidence_Misbehavior(t *testing.T) { blocks := fetchBlockChain(t) testNode(t, func(t *testing.T, node e2e.Node) { - seenEvidence := make(map[int64]struct{}) for _, block := range blocks { // Find any evidence blaming this node in this block var nodeEvidence types.Evidence @@ -32,26 +29,6 @@ func TestEvidence_Misbehavior(t *testing.T) { if nodeEvidence == nil { continue // no evidence for the node at this height } - - // Check that evidence was as expected - misbehavior, ok := node.Misbehaviors[nodeEvidence.Height()] - require.True(t, ok, "found unexpected evidence %v in height %v", - nodeEvidence, block.Height) - - switch misbehavior { - case "double-prevote": - require.IsType(t, &types.DuplicateVoteEvidence{}, nodeEvidence, "unexpected evidence type") - default: - t.Fatalf("unknown misbehavior %v", misbehavior) - } - - seenEvidence[nodeEvidence.Height()] = struct{}{} - } - // see if there is any evidence that we were expecting but didn't see - for height, misbehavior := range node.Misbehaviors { - _, ok := seenEvidence[height] - require.True(t, ok, "expected evidence for %v misbehavior at height %v by node but was never found", - misbehavior, height) } }) } diff --git a/test/e2e/tests/validator_test.go b/test/e2e/tests/validator_test.go index febbbe44a..47889f27e 100644 --- a/test/e2e/tests/validator_test.go +++ b/test/e2e/tests/validator_test.go @@ -4,10 +4,9 @@ import ( "bytes" "testing" - "github.com/line/ostracon/crypto/vrf" - "github.com/stretchr/testify/require" + "github.com/line/ostracon/crypto/vrf" e2e "github.com/line/ostracon/test/e2e/pkg" "github.com/line/ostracon/types" ) @@ -58,14 +57,13 @@ func TestValidator_Sets(t *testing.T) { require.Equal(t, expected.VotingPower, actual.VotingPower, "incorrect VotingPower of validator set at height %v", h) if node.Name == "full02" { - // VotingWeight is set VotingPower // `full02` sync to use `light.Block.ValidatorSet` // It's made by `client.ValidatorsAndVoters` (It's not `client.Validators`) // See: Validators and ValidatorsAndVoters in `rpc.core.consensus.go` - require.Equal(t, expected.VotingPower, actual.VotingWeight, - "incorrect VotingWeight of validator set at height %v", h) + // VotingWeight is set VotingPower until sync finished + // After syncing, VotingWeight is set zero + // It's difficult to check the VotingWeight in this test, so skipping } else { - // VotingWeight is set zero require.Equal(t, int64(0), actual.VotingWeight, "incorrect VotingWeight of validator set at height %v", h) } diff --git a/test/maverick/README.md b/test/maverick/README.md deleted file mode 100644 index 5e86d8e32..000000000 --- a/test/maverick/README.md +++ /dev/null @@ -1,49 +0,0 @@ -# Maverick - -A byzantine node used to test Ostracon consensus against a plethora of different faulty misbehaviors. Designed to easily create new faulty misbehaviors to examine how an Ostracon network reacts to the misbehavior. Can also be used for fuzzy testing with different network arrangements. - -## Misbehaviors - -A misbehavior allows control at the following stages as highlighted by the struct below - -``` -type Misbehavior struct { - String string - - EnterPropose func(cs *State, height int64, round int32) - - EnterPrevote func(cs *State, height int64, round int32) - - EnterPrecommit func(cs *State, height int64, round int32) - - ReceivePrevote func(cs *State, prevote *types.Vote) - - ReceivePrecommit func(cs *State, precommit *types.Vote) - - ReceiveProposal func(cs *State, proposal *types.Proposal) error -} -``` - -At each of these events, the node can exhibit a different misbehavior. To create a new misbehavior define a function that builds off the existing default misbehavior and then overrides one or more of these functions. Then append it to the misbehaviors list so the node recognizes it like so: - -``` -var MisbehaviorList = map[string]Misbehavior{ - "double-prevote": DoublePrevoteMisbehavior(), -} -``` - -## Setup - -The maverick node takes most of the functionality from the existing Ostracon CLI. To install this, in the directory of this readme, run: - -```bash -go build -``` - -Use `maverick init` to initialize a single node and `maverick node` to run it. This will run it normally unless you use the misbehaviors flag as follows: - -```bash -maverick node --proxy_app persistent_kvstore --misbehaviors double-prevote,10 -``` - -This would cause the node to vote twice in every round at height 10. To add more misbehaviors at different heights, append the next misbehavior and height after the first (with comma separation). diff --git a/test/maverick/consensus/misbehavior.go b/test/maverick/consensus/misbehavior.go deleted file mode 100644 index 494caa4d9..000000000 --- a/test/maverick/consensus/misbehavior.go +++ /dev/null @@ -1,401 +0,0 @@ -package consensus - -import ( - "fmt" - - tmcon "github.com/line/ostracon/consensus" - cstypes "github.com/line/ostracon/consensus/types" - tmproto "github.com/line/ostracon/proto/ostracon/types" - "github.com/line/ostracon/types" -) - -// MisbehaviorList encompasses a list of all possible behaviors -var MisbehaviorList = map[string]Misbehavior{ - "double-prevote": DoublePrevoteMisbehavior(), -} - -type Misbehavior struct { - Name string - - EnterPropose func(cs *State, height int64, round int32) - - EnterPrevote func(cs *State, height int64, round int32) - - EnterPrecommit func(cs *State, height int64, round int32) - - ReceivePrevote func(cs *State, prevote *types.Vote) - - ReceivePrecommit func(cs *State, precommit *types.Vote) - - ReceiveProposal func(cs *State, proposal *types.Proposal) error -} - -// BEHAVIORS - -func DefaultMisbehavior() Misbehavior { - return Misbehavior{ - Name: "default", - EnterPropose: defaultEnterPropose, - EnterPrevote: defaultEnterPrevote, - EnterPrecommit: defaultEnterPrecommit, - ReceivePrevote: defaultReceivePrevote, - ReceivePrecommit: defaultReceivePrecommit, - ReceiveProposal: defaultReceiveProposal, - } -} - -// DoublePrevoteMisbehavior will make a node prevote both nil and a block in the same -// height and round. -func DoublePrevoteMisbehavior() Misbehavior { - b := DefaultMisbehavior() - b.Name = "double-prevote" - b.EnterPrevote = func(cs *State, height int64, round int32) { - - // If a block is locked, prevote that. - if cs.LockedBlock != nil { - cs.Logger.Debug("enterPrevote: already locked on a block, prevoting locked block") - cs.signAddVote(tmproto.PrevoteType, cs.LockedBlock.Hash(), cs.LockedBlockParts.Header()) - return - } - - // If ProposalBlock is nil, prevote nil. - if cs.ProposalBlock == nil { - cs.Logger.Debug("enterPrevote: ProposalBlock is nil") - cs.signAddVote(tmproto.PrevoteType, nil, types.PartSetHeader{}) - return - } - - // Validate proposal block - err := cs.blockExec.ValidateBlock(cs.state, round, cs.ProposalBlock) - if err != nil { - // ProposalBlock is invalid, prevote nil. - cs.Logger.Error("enterPrevote: ProposalBlock is invalid", "err", err) - cs.signAddVote(tmproto.PrevoteType, nil, types.PartSetHeader{}) - return - } - - if cs.sw == nil { - cs.Logger.Error("nil switch") - return - } - - prevote, err := cs.signVote(tmproto.PrevoteType, cs.ProposalBlock.Hash(), cs.ProposalBlockParts.Header()) - if err != nil { - cs.Logger.Error("enterPrevote: Unable to sign block", "err", err) - } - - nilPrevote, err := cs.signVote(tmproto.PrevoteType, nil, types.PartSetHeader{}) - if err != nil { - cs.Logger.Error("enterPrevote: Unable to sign block", "err", err) - } - - // add our own vote - cs.sendInternalMessage(msgInfo{&tmcon.VoteMessage{Vote: prevote}, ""}) - - cs.Logger.Info("Sending conflicting votes") - peers := cs.sw.Peers().List() - // there has to be at least two other peers connected else this behavior works normally - for idx, peer := range peers { - if idx%2 == 0 { // sign the proposal block - peer.Send(VoteChannel, tmcon.MustEncode(&tmcon.VoteMessage{Vote: prevote})) - } else { // sign a nil block - peer.Send(VoteChannel, tmcon.MustEncode(&tmcon.VoteMessage{Vote: nilPrevote})) - } - } - } - return b -} - -// DEFAULTS - -func defaultEnterPropose(cs *State, height int64, round int32) { - logger := cs.Logger.With("height", height, "round", round) - // If we don't get the proposal and all block parts quick enough, enterPrevote - cs.scheduleTimeout(cs.config.Propose(round), height, round, cstypes.RoundStepPropose) - - // Nothing more to do if we're not a validator - if cs.privValidator == nil { - logger.Debug("This node is not a validator") - return - } - logger.Debug("This node is a validator") - - pubKey, err := cs.privValidator.GetPubKey() - if err != nil { - // If this node is a validator & proposer in the currentx round, it will - // miss the opportunity to create a block. - logger.Error("Error on retrival of pubkey", "err", err) - return - } - address := pubKey.Address() - - // if not a validator, we're done - if !cs.Validators.HasAddress(address) { - logger.Debug("This node is not a validator", "addr", address, "vals", cs.Validators) - return - } - - if cs.isProposer(address) { - logger.Debug("enterPropose: our turn to propose", - "proposer", address, - ) - cs.decideProposal(height, round) - } else { - logger.Debug("enterPropose: not our turn to propose", - "proposer", cs.Proposer.Address, - ) - } -} - -func defaultEnterPrevote(cs *State, height int64, round int32) { - logger := cs.Logger.With("height", height, "round", round) - - // If a block is locked, prevote that. - if cs.LockedBlock != nil { - logger.Debug("enterPrevote: already locked on a block, prevoting locked block") - cs.signAddVote(tmproto.PrevoteType, cs.LockedBlock.Hash(), cs.LockedBlockParts.Header()) - return - } - - // If ProposalBlock is nil, prevote nil. - if cs.ProposalBlock == nil { - logger.Debug("enterPrevote: ProposalBlock is nil") - cs.signAddVote(tmproto.PrevoteType, nil, types.PartSetHeader{}) - return - } - - // Validate proposal block - err := cs.blockExec.ValidateBlock(cs.state, round, cs.ProposalBlock) - if err != nil { - // ProposalBlock is invalid, prevote nil. - logger.Error("enterPrevote: ProposalBlock is invalid", "err", err) - cs.signAddVote(tmproto.PrevoteType, nil, types.PartSetHeader{}) - return - } - - // Prevote cs.ProposalBlock - // NOTE: the proposal signature is validated when it is received, - // and the proposal block parts are validated as they are received (against the merkle hash in the proposal) - logger.Debug("enterPrevote: ProposalBlock is valid") - cs.signAddVote(tmproto.PrevoteType, cs.ProposalBlock.Hash(), cs.ProposalBlockParts.Header()) -} - -func defaultEnterPrecommit(cs *State, height int64, round int32) { - logger := cs.Logger.With("height", height, "round", round) - - // check for a polka - blockID, ok := cs.Votes.Prevotes(round).TwoThirdsMajority() - - // If we don't have a polka, we must precommit nil. - if !ok { - if cs.LockedBlock != nil { - logger.Debug("enterPrecommit: no +2/3 prevotes during enterPrecommit while we're locked; precommitting nil") - } else { - logger.Debug("enterPrecommit: no +2/3 prevotes during enterPrecommit; precommitting nil.") - } - cs.signAddVote(tmproto.PrecommitType, nil, types.PartSetHeader{}) - return - } - - // At this point +2/3 prevoted for a particular block or nil. - _ = cs.eventBus.PublishEventPolka(cs.RoundStateEvent()) - - // the latest POLRound should be this round. - polRound, _ := cs.Votes.POLInfo() - if polRound < round { - panic(fmt.Sprintf("This POLRound should be %v but got %v", round, polRound)) - } - - // +2/3 prevoted nil. Unlock and precommit nil. - if len(blockID.Hash) == 0 { - if cs.LockedBlock == nil { - logger.Debug("enterPrecommit: +2/3 prevoted for nil") - } else { - logger.Debug("enterPrecommit: +2/3 prevoted for nil; unlocking") - cs.LockedRound = -1 - cs.LockedBlock = nil - cs.LockedBlockParts = nil - _ = cs.eventBus.PublishEventUnlock(cs.RoundStateEvent()) - } - cs.signAddVote(tmproto.PrecommitType, nil, types.PartSetHeader{}) - return - } - - // At this point, +2/3 prevoted for a particular block. - - // If we're already locked on that block, precommit it, and update the LockedRound - if cs.LockedBlock.HashesTo(blockID.Hash) { - logger.Debug("enterPrecommit: +2/3 prevoted locked block; relocking") - cs.LockedRound = round - _ = cs.eventBus.PublishEventRelock(cs.RoundStateEvent()) - cs.signAddVote(tmproto.PrecommitType, blockID.Hash, blockID.PartSetHeader) - return - } - - // If +2/3 prevoted for proposal block, stage and precommit it - if cs.ProposalBlock.HashesTo(blockID.Hash) { - logger.Debug("enterPrecommit: +2/3 prevoted proposal block; locking", "hash", blockID.Hash) - // Validate the block. - if err := cs.blockExec.ValidateBlock(cs.state, round, cs.ProposalBlock); err != nil { - panic(fmt.Sprintf("enterPrecommit: +2/3 prevoted for an invalid block: %v", err)) - } - cs.LockedRound = round - cs.LockedBlock = cs.ProposalBlock - cs.LockedBlockParts = cs.ProposalBlockParts - _ = cs.eventBus.PublishEventLock(cs.RoundStateEvent()) - cs.signAddVote(tmproto.PrecommitType, blockID.Hash, blockID.PartSetHeader) - return - } - - // There was a polka in this round for a block we don't have. - // Fetch that block, unlock, and precommit nil. - // The +2/3 prevotes for this round is the POL for our unlock. - logger.Debug("enterPrecommit: +2/3 prevotes for a block we don't have; voting nil", "blockID", blockID) - cs.LockedRound = -1 - cs.LockedBlock = nil - cs.LockedBlockParts = nil - if !cs.ProposalBlockParts.HasHeader(blockID.PartSetHeader) { - cs.ProposalBlock = nil - cs.ProposalBlockParts = types.NewPartSetFromHeader(blockID.PartSetHeader) - } - _ = cs.eventBus.PublishEventUnlock(cs.RoundStateEvent()) - cs.signAddVote(tmproto.PrecommitType, nil, types.PartSetHeader{}) -} - -func defaultReceivePrevote(cs *State, vote *types.Vote) { - height := cs.Height - prevotes := cs.Votes.Prevotes(vote.Round) - - // If +2/3 prevotes for a block or nil for *any* round: - if blockID, ok := prevotes.TwoThirdsMajority(); ok { - - // There was a polka! - // If we're locked but this is a recent polka, unlock. - // If it matches our ProposalBlock, update the ValidBlock - - // Unlock if `cs.LockedRound < vote.Round <= cs.Round` - // NOTE: If vote.Round > cs.Round, we'll deal with it when we get to vote.Round - if (cs.LockedBlock != nil) && - (cs.LockedRound < vote.Round) && - (vote.Round <= cs.Round) && - !cs.LockedBlock.HashesTo(blockID.Hash) { - - cs.Logger.Info("Unlocking because of POL.", "lockedRound", cs.LockedRound, "POLRound", vote.Round) - cs.LockedRound = -1 - cs.LockedBlock = nil - cs.LockedBlockParts = nil - _ = cs.eventBus.PublishEventUnlock(cs.RoundStateEvent()) - } - - // Update Valid* if we can. - // NOTE: our proposal block may be nil or not what received a polka.. - if len(blockID.Hash) != 0 && (cs.ValidRound < vote.Round) && (vote.Round == cs.Round) { - - if cs.ProposalBlock.HashesTo(blockID.Hash) { - cs.Logger.Info( - "Updating ValidBlock because of POL.", "validRound", cs.ValidRound, "POLRound", vote.Round) - cs.ValidRound = vote.Round - cs.ValidBlock = cs.ProposalBlock - cs.ValidBlockParts = cs.ProposalBlockParts - } else { - cs.Logger.Info( - "valid block we do not know about; set ProposalBlock=nil", - "proposal", cs.ProposalBlock.Hash(), - "blockID", blockID.Hash, - ) - - // We're getting the wrong block. - cs.ProposalBlock = nil - } - if !cs.ProposalBlockParts.HasHeader(blockID.PartSetHeader) { - cs.ProposalBlockParts = types.NewPartSetFromHeader(blockID.PartSetHeader) - } - cs.evsw.FireEvent(types.EventValidBlock, &cs.RoundState) - _ = cs.eventBus.PublishEventValidBlock(cs.RoundStateEvent()) - } - } - - // If +2/3 prevotes for *anything* for future round: - switch { - case cs.Round < vote.Round && prevotes.HasTwoThirdsAny(): - // Round-skip if there is any 2/3+ of votes ahead of us - cs.enterNewRound(height, vote.Round) - case cs.Round == vote.Round && cstypes.RoundStepPrevote <= cs.Step: // current round - blockID, ok := prevotes.TwoThirdsMajority() - if ok && (cs.isProposalComplete() || len(blockID.Hash) == 0) { - cs.enterPrecommit(height, vote.Round) - } else if prevotes.HasTwoThirdsAny() { - cs.enterPrevoteWait(height, vote.Round) - } - case cs.Proposal != nil && 0 <= cs.Proposal.POLRound && cs.Proposal.POLRound == vote.Round: - // If the proposal is now complete, enter prevote of cs.Round. - if cs.isProposalComplete() { - cs.enterPrevote(height, cs.Round) - } - } - -} - -func defaultReceivePrecommit(cs *State, vote *types.Vote) { - height := cs.Height - precommits := cs.Votes.Precommits(vote.Round) - cs.Logger.Info("Added to precommit", "vote", vote, "precommits", precommits.StringShort()) - - blockID, ok := precommits.TwoThirdsMajority() - if ok { - // Executed as TwoThirdsMajority could be from a higher round - cs.enterNewRound(height, vote.Round) - cs.enterPrecommit(height, vote.Round) - if len(blockID.Hash) != 0 { - cs.enterCommit(height, vote.Round) - if cs.config.SkipTimeoutCommit && precommits.HasAll() { - cs.enterNewRound(cs.Height, 0) - } - } else { - cs.enterPrecommitWait(height, vote.Round) - } - } else if cs.Round <= vote.Round && precommits.HasTwoThirdsAny() { - cs.enterNewRound(height, vote.Round) - cs.enterPrecommitWait(height, vote.Round) - } -} - -func defaultReceiveProposal(cs *State, proposal *types.Proposal) error { - // Already have one - // TODO: possibly catch double proposals - if cs.Proposal != nil { - return nil - } - - // Does not apply - if proposal.Height != cs.Height || proposal.Round != cs.Round { - return nil - } - - // Verify POLRound, which must be -1 or in range [0, proposal.Round). - if proposal.POLRound < -1 || - (proposal.POLRound >= 0 && proposal.POLRound >= proposal.Round) { - return ErrInvalidProposalPOLRound - } - - proposer := cs.Validators.SelectProposer(cs.state.LastProofHash, proposal.Height, proposal.Round) - - p := proposal.ToProto() - // Verify signature - if !proposer.PubKey.VerifySignature( - types.ProposalSignBytes(cs.state.ChainID, p), proposal.Signature) { - return ErrInvalidProposalSignature - } - - proposal.Signature = p.Signature - cs.Proposal = proposal - // We don't update cs.ProposalBlockParts if it is already set. - // This happens if we're already in cstypes.RoundStepCommit or if there is a valid block in the current round. - // TODO: We can check if Proposal is for a different block as this is a sign of misbehavior! - if cs.ProposalBlockParts == nil { - cs.ProposalBlockParts = types.NewPartSetFromHeader(proposal.BlockID.PartSetHeader) - } - - cs.Logger.Info("received proposal", "proposal", proposal) - return nil -} diff --git a/test/maverick/consensus/msgs.go b/test/maverick/consensus/msgs.go deleted file mode 100644 index 620fc562c..000000000 --- a/test/maverick/consensus/msgs.go +++ /dev/null @@ -1,115 +0,0 @@ -package consensus - -import ( - "errors" - "fmt" - - tmcon "github.com/line/ostracon/consensus" - cstypes "github.com/line/ostracon/consensus/types" - tmmath "github.com/line/ostracon/libs/math" - "github.com/line/ostracon/p2p" - tmcons "github.com/line/ostracon/proto/ostracon/consensus" - tmproto "github.com/line/ostracon/proto/ostracon/types" - "github.com/line/ostracon/types" -) - -func WALToProto(msg tmcon.WALMessage) (*tmcons.WALMessage, error) { - var pb tmcons.WALMessage - - switch msg := msg.(type) { - case types.EventDataRoundState: - pb = tmcons.WALMessage{ - Sum: &tmcons.WALMessage_EventDataRoundState{ - EventDataRoundState: &tmproto.EventDataRoundState{ - Height: msg.Height, - Round: msg.Round, - Step: msg.Step, - }, - }, - } - case msgInfo: - consMsg, err := tmcon.MsgToProto(msg.Msg) - if err != nil { - return nil, err - } - pb = tmcons.WALMessage{ - Sum: &tmcons.WALMessage_MsgInfo{ - MsgInfo: &tmcons.MsgInfo{ - Msg: *consMsg, - PeerID: string(msg.PeerID), - }, - }, - } - case timeoutInfo: - pb = tmcons.WALMessage{ - Sum: &tmcons.WALMessage_TimeoutInfo{ - TimeoutInfo: &tmcons.TimeoutInfo{ - Duration: msg.Duration, - Height: msg.Height, - Round: msg.Round, - Step: uint32(msg.Step), - }, - }, - } - case tmcon.EndHeightMessage: - pb = tmcons.WALMessage{ - Sum: &tmcons.WALMessage_EndHeight{ - EndHeight: &tmcons.EndHeight{ - Height: msg.Height, - }, - }, - } - default: - return nil, fmt.Errorf("to proto: wal message not recognized: %T", msg) - } - - return &pb, nil -} - -// WALFromProto takes a proto wal message and return a consensus walMessage and error -func WALFromProto(msg *tmcons.WALMessage) (tmcon.WALMessage, error) { - if msg == nil { - return nil, errors.New("nil WAL message") - } - var pb tmcon.WALMessage - - switch msg := msg.Sum.(type) { - case *tmcons.WALMessage_EventDataRoundState: - pb = types.EventDataRoundState{ - Height: msg.EventDataRoundState.Height, - Round: msg.EventDataRoundState.Round, - Step: msg.EventDataRoundState.Step, - } - case *tmcons.WALMessage_MsgInfo: - walMsg, err := tmcon.MsgFromProto(&msg.MsgInfo.Msg) - if err != nil { - return nil, fmt.Errorf("msgInfo from proto error: %w", err) - } - pb = msgInfo{ - Msg: walMsg, - PeerID: p2p.ID(msg.MsgInfo.PeerID), - } - - case *tmcons.WALMessage_TimeoutInfo: - tis, err := tmmath.SafeConvertUint8(int64(msg.TimeoutInfo.Step)) - // deny message based on possible overflow - if err != nil { - return nil, fmt.Errorf("denying message due to possible overflow: %w", err) - } - pb = timeoutInfo{ - Duration: msg.TimeoutInfo.Duration, - Height: msg.TimeoutInfo.Height, - Round: msg.TimeoutInfo.Round, - Step: cstypes.RoundStepType(tis), - } - return pb, nil - case *tmcons.WALMessage_EndHeight: - pb := tmcon.EndHeightMessage{ - Height: msg.EndHeight.Height, - } - return pb, nil - default: - return nil, fmt.Errorf("from proto: wal message not recognized: %T", msg) - } - return pb, nil -} diff --git a/test/maverick/consensus/reactor.go b/test/maverick/consensus/reactor.go deleted file mode 100644 index 0ce2cfe72..000000000 --- a/test/maverick/consensus/reactor.go +++ /dev/null @@ -1,1425 +0,0 @@ -package consensus - -import ( - "errors" - "fmt" - "reflect" - "sync" - "time" - - "github.com/gogo/protobuf/proto" - - tmcon "github.com/line/ostracon/consensus" - cstypes "github.com/line/ostracon/consensus/types" - "github.com/line/ostracon/libs/bits" - tmevents "github.com/line/ostracon/libs/events" - tmjson "github.com/line/ostracon/libs/json" - "github.com/line/ostracon/libs/log" - tmsync "github.com/line/ostracon/libs/sync" - "github.com/line/ostracon/p2p" - tmcons "github.com/line/ostracon/proto/ostracon/consensus" - tmproto "github.com/line/ostracon/proto/ostracon/types" - sm "github.com/line/ostracon/state" - "github.com/line/ostracon/types" - tmtime "github.com/line/ostracon/types/time" -) - -const ( - StateChannel = byte(0x20) - DataChannel = byte(0x21) - VoteChannel = byte(0x22) - VoteSetBitsChannel = byte(0x23) - - maxMsgSize = 1048576 // 1MB; NOTE/TODO: keep in sync with types.PartSet sizes. - - blocksToContributeToBecomeGoodPeer = 10000 - votesToContributeToBecomeGoodPeer = 10000 -) - -//----------------------------------------------------------------------------- - -// Reactor defines a reactor for the consensus service. -type Reactor struct { - p2p.BaseReactor // BaseService + p2p.Switch - - conS *State - - mtx tmsync.RWMutex - waitSync bool - eventBus *types.EventBus - - Metrics *tmcon.Metrics -} - -type ReactorOption func(*Reactor) - -// NewReactor returns a new Reactor with the given -// consensusState. -func NewReactor(consensusState *State, waitSync bool, async bool, recvBufSize int, options ...ReactorOption) *Reactor { - conR := &Reactor{ - conS: consensusState, - waitSync: waitSync, - Metrics: tmcon.NopMetrics(), - } - conR.BaseReactor = *p2p.NewBaseReactor("Consensus", conR, async, recvBufSize) - - for _, option := range options { - option(conR) - } - - return conR -} - -// OnStart implements BaseService by subscribing to events, which later will be -// broadcasted to other peers and starting state if we're not in fast sync. -func (conR *Reactor) OnStart() error { - conR.Logger.Info("Reactor ", "waitSync", conR.WaitSync()) - - // call BaseReactor's OnStart() - err := conR.BaseReactor.OnStart() - if err != nil { - return err - } - - // start routine that computes peer statistics for evaluating peer quality - go conR.peerStatsRoutine() - - conR.subscribeToBroadcastEvents() - - if !conR.WaitSync() { - conR.conS.SetSwitch(conR.Switch) - err := conR.conS.Start() - if err != nil { - return err - } - } - - return nil -} - -// OnStop implements BaseService by unsubscribing from events and stopping -// state. -func (conR *Reactor) OnStop() { - conR.unsubscribeFromBroadcastEvents() - if err := conR.conS.Stop(); err != nil { - conR.Logger.Error("Error stopping consensus state", "err", err) - } - if !conR.WaitSync() { - conR.conS.Wait() - } -} - -// SwitchToConsensus switches from fast_sync mode to consensus mode. -// It resets the state, turns off fast_sync, and starts the consensus state-machine -func (conR *Reactor) SwitchToConsensus(state sm.State, skipWAL bool) { - conR.Logger.Info("SwitchToConsensus") - - // We have no votes, so reconstruct LastCommit from SeenCommit. - if state.LastBlockHeight > 0 { - conR.conS.reconstructLastCommit(state) - } - - // NOTE: The line below causes broadcastNewRoundStepRoutine() to broadcast a - // NewRoundStepMessage. - conR.conS.updateToState(state) - - conR.mtx.Lock() - conR.waitSync = false - conR.mtx.Unlock() - conR.Metrics.FastSyncing.Set(0) - conR.Metrics.StateSyncing.Set(0) - - if skipWAL { - conR.conS.doWALCatchup = false - } - conR.conS.SetSwitch(conR.Switch) - err := conR.conS.Start() - if err != nil { - panic(fmt.Sprintf(`Failed to start consensus state: %v - -conS: -%+v - -conR: -%+v`, err, conR.conS, conR)) - } -} - -// GetChannels implements Reactor -func (conR *Reactor) GetChannels() []*p2p.ChannelDescriptor { - // TODO optimize - return []*p2p.ChannelDescriptor{ - { - ID: StateChannel, - Priority: 6, - SendQueueCapacity: 100, - RecvMessageCapacity: maxMsgSize, - }, - { - ID: DataChannel, // maybe split between gossiping current block and catchup stuff - // once we gossip the whole block there's nothing left to send until next height or round - Priority: 10, - SendQueueCapacity: 100, - RecvBufferCapacity: 50 * 4096, - RecvMessageCapacity: maxMsgSize, - }, - { - ID: VoteChannel, - Priority: 7, - SendQueueCapacity: 100, - RecvBufferCapacity: 100 * 100, - RecvMessageCapacity: maxMsgSize, - }, - { - ID: VoteSetBitsChannel, - Priority: 1, - SendQueueCapacity: 2, - RecvBufferCapacity: 1024, - RecvMessageCapacity: maxMsgSize, - }, - } -} - -// InitPeer implements Reactor by creating a state for the peer. -func (conR *Reactor) InitPeer(peer p2p.Peer) p2p.Peer { - peerState := NewPeerState(peer).SetLogger(conR.Logger) - peer.Set(types.PeerStateKey, peerState) - return peer -} - -// AddPeer implements Reactor by spawning multiple gossiping goroutines for the -// peer. -func (conR *Reactor) AddPeer(peer p2p.Peer) { - if !conR.IsRunning() { - return - } - - peerState, ok := peer.Get(types.PeerStateKey).(*PeerState) - if !ok { - panic(fmt.Sprintf("peer %v has no state", peer)) - } - // Begin routines for this peer. - go conR.gossipDataRoutine(peer, peerState) - go conR.gossipVotesRoutine(peer, peerState) - go conR.queryMaj23Routine(peer, peerState) - - // Send our state to peer. - // If we're fast_syncing, broadcast a RoundStepMessage later upon SwitchToConsensus(). - if !conR.WaitSync() { - conR.sendNewRoundStepMessage(peer) - } -} - -// RemovePeer is a noop. -func (conR *Reactor) RemovePeer(peer p2p.Peer, reason interface{}) { - if !conR.IsRunning() { - return - } - // TODO - // ps, ok := peer.Get(PeerStateKey).(*PeerState) - // if !ok { - // panic(fmt.Sprintf("Peer %v has no state", peer)) - // } - // ps.Disconnect() -} - -// Receive implements Reactor -// NOTE: We process these messages even when we're fast_syncing. -// Messages affect either a peer state or the consensus state. -// Peer state updates can happen in parallel, but processing of -// proposals, block parts, and votes are ordered by the receiveRoutine -// NOTE: blocks on consensus state for proposals, block parts, and votes -func (conR *Reactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) { - if !conR.IsRunning() { - conR.Logger.Debug("Receive", "src", src, "chId", chID, "bytes", msgBytes) - return - } - - msg, err := decodeMsg(msgBytes) - if err != nil { - conR.Logger.Error("Error decoding message", "src", src, "chId", chID, "err", err) - conR.Switch.StopPeerForError(src, err) - return - } - - if err = msg.ValidateBasic(); err != nil { - conR.Logger.Error("Peer sent us invalid msg", "peer", src, "msg", msg, "err", err) - conR.Switch.StopPeerForError(src, err) - return - } - - conR.Logger.Debug("Receive", "src", src, "chId", chID, "msg", msg) - - // Get peer states - ps, ok := src.Get(types.PeerStateKey).(*PeerState) - if !ok { - panic(fmt.Sprintf("Peer %v has no state", src)) - } - - switch chID { - case StateChannel: - switch msg := msg.(type) { - case *tmcon.NewRoundStepMessage: - conR.conS.mtx.Lock() - initialHeight := conR.conS.state.InitialHeight - conR.conS.mtx.Unlock() - if err = msg.ValidateHeight(initialHeight); err != nil { - conR.Logger.Error("Peer sent us invalid msg", "peer", src, "msg", msg, "err", err) - conR.Switch.StopPeerForError(src, err) - return - } - ps.ApplyNewRoundStepMessage(msg) - case *tmcon.NewValidBlockMessage: - ps.ApplyNewValidBlockMessage(msg) - case *tmcon.HasVoteMessage: - ps.ApplyHasVoteMessage(msg) - case *tmcon.VoteSetMaj23Message: - cs := conR.conS - cs.mtx.Lock() - height, votes := cs.Height, cs.Votes - cs.mtx.Unlock() - if height != msg.Height { - return - } - // Peer claims to have a maj23 for some BlockID at H,R,S, - err := votes.SetPeerMaj23(msg.Round, msg.Type, ps.peer.ID(), msg.BlockID) - if err != nil { - conR.Switch.StopPeerForError(src, err) - return - } - // Respond with a VoteSetBitsMessage showing which votes we have. - // (and consequently shows which we don't have) - var ourVotes *bits.BitArray - switch msg.Type { - case tmproto.PrevoteType: - ourVotes = votes.Prevotes(msg.Round).BitArrayByBlockID(msg.BlockID) - case tmproto.PrecommitType: - ourVotes = votes.Precommits(msg.Round).BitArrayByBlockID(msg.BlockID) - default: - panic("Bad VoteSetBitsMessage field Type. Forgot to add a check in ValidateBasic?") - } - src.TrySend(VoteSetBitsChannel, tmcon.MustEncode(&tmcon.VoteSetBitsMessage{ - Height: msg.Height, - Round: msg.Round, - Type: msg.Type, - BlockID: msg.BlockID, - Votes: ourVotes, - })) - default: - conR.Logger.Error(fmt.Sprintf("Unknown message type %v", reflect.TypeOf(msg))) - } - - case DataChannel: - if conR.WaitSync() { - conR.Logger.Info("Ignoring message received during sync", "msg", msg) - return - } - switch msg := msg.(type) { - case *tmcon.ProposalMessage: - ps.SetHasProposal(msg.Proposal) - conR.conS.peerMsgQueue <- msgInfo{msg, src.ID()} - case *tmcon.ProposalPOLMessage: - ps.ApplyProposalPOLMessage(msg) - case *tmcon.BlockPartMessage: - ps.SetHasProposalBlockPart(msg.Height, msg.Round, int(msg.Part.Index)) - conR.Metrics.BlockParts.With("peer_id", string(src.ID())).Add(1) - conR.conS.peerMsgQueue <- msgInfo{msg, src.ID()} - default: - conR.Logger.Error(fmt.Sprintf("Unknown message type %v", reflect.TypeOf(msg))) - } - - case VoteChannel: - if conR.WaitSync() { - conR.Logger.Info("Ignoring message received during sync", "msg", msg) - return - } - switch msg := msg.(type) { - case *tmcon.VoteMessage: - cs := conR.conS - cs.mtx.RLock() - height, voterSize, lastCommitSize := cs.Height, cs.Voters.Size(), cs.LastCommit.Size() - cs.mtx.RUnlock() - ps.EnsureVoteBitArrays(height, voterSize) - ps.EnsureVoteBitArrays(height-1, lastCommitSize) - ps.SetHasVote(msg.Vote) - - cs.peerMsgQueue <- msgInfo{msg, src.ID()} - - default: - // don't punish (leave room for soft upgrades) - conR.Logger.Error(fmt.Sprintf("Unknown message type %v", reflect.TypeOf(msg))) - } - - case VoteSetBitsChannel: - if conR.WaitSync() { - conR.Logger.Info("Ignoring message received during sync", "msg", msg) - return - } - switch msg := msg.(type) { - case *tmcon.VoteSetBitsMessage: - cs := conR.conS - cs.mtx.Lock() - height, votes := cs.Height, cs.Votes - cs.mtx.Unlock() - - if height == msg.Height { - var ourVotes *bits.BitArray - switch msg.Type { - case tmproto.PrevoteType: - ourVotes = votes.Prevotes(msg.Round).BitArrayByBlockID(msg.BlockID) - case tmproto.PrecommitType: - ourVotes = votes.Precommits(msg.Round).BitArrayByBlockID(msg.BlockID) - default: - panic("Bad VoteSetBitsMessage field Type. Forgot to add a check in ValidateBasic?") - } - ps.ApplyVoteSetBitsMessage(msg, ourVotes) - } else { - ps.ApplyVoteSetBitsMessage(msg, nil) - } - default: - // don't punish (leave room for soft upgrades) - conR.Logger.Error(fmt.Sprintf("Unknown message type %v", reflect.TypeOf(msg))) - } - - default: - conR.Logger.Error(fmt.Sprintf("Unknown chId %X", chID)) - } -} - -// SetEventBus sets event bus. -func (conR *Reactor) SetEventBus(b *types.EventBus) { - conR.eventBus = b - conR.conS.SetEventBus(b) -} - -// WaitSync returns whether the consensus reactor is waiting for state/fast sync. -func (conR *Reactor) WaitSync() bool { - conR.mtx.RLock() - defer conR.mtx.RUnlock() - return conR.waitSync -} - -//-------------------------------------- - -// subscribeToBroadcastEvents subscribes for new round steps and votes -// using internal pubsub defined on state to broadcast -// them to peers upon receiving. -func (conR *Reactor) subscribeToBroadcastEvents() { - const subscriber = "consensus-reactor" - if err := conR.conS.evsw.AddListenerForEvent(subscriber, types.EventNewRoundStep, - func(data tmevents.EventData) { - conR.broadcastNewRoundStepMessage(data.(*cstypes.RoundState)) - }); err != nil { - conR.Logger.Error("Error adding listener for events", "err", err) - } - - if err := conR.conS.evsw.AddListenerForEvent(subscriber, types.EventValidBlock, - func(data tmevents.EventData) { - conR.broadcastNewValidBlockMessage(data.(*cstypes.RoundState)) - }); err != nil { - conR.Logger.Error("Error adding listener for events", "err", err) - } - - if err := conR.conS.evsw.AddListenerForEvent(subscriber, types.EventVote, - func(data tmevents.EventData) { - conR.broadcastHasVoteMessage(data.(*types.Vote)) - }); err != nil { - conR.Logger.Error("Error adding listener for events", "err", err) - } - -} - -func (conR *Reactor) unsubscribeFromBroadcastEvents() { - const subscriber = "consensus-reactor" - conR.conS.evsw.RemoveListener(subscriber) -} - -func (conR *Reactor) broadcastNewRoundStepMessage(rs *cstypes.RoundState) { - nrsMsg := makeRoundStepMessage(rs) - conR.Switch.Broadcast(StateChannel, tmcon.MustEncode(nrsMsg)) -} - -func (conR *Reactor) broadcastNewValidBlockMessage(rs *cstypes.RoundState) { - csMsg := &tmcon.NewValidBlockMessage{ - Height: rs.Height, - Round: rs.Round, - BlockPartSetHeader: rs.ProposalBlockParts.Header(), - BlockParts: rs.ProposalBlockParts.BitArray(), - IsCommit: rs.Step == cstypes.RoundStepCommit, - } - conR.Switch.Broadcast(StateChannel, tmcon.MustEncode(csMsg)) -} - -// Broadcasts HasVoteMessage to peers that care. -func (conR *Reactor) broadcastHasVoteMessage(vote *types.Vote) { - msg := &tmcon.HasVoteMessage{ - Height: vote.Height, - Round: vote.Round, - Type: vote.Type, - Index: vote.ValidatorIndex, - } - conR.Switch.Broadcast(StateChannel, tmcon.MustEncode(msg)) - /* - // TODO: Make this broadcast more selective. - for _, peer := range conR.Switch.Peers().List() { - ps, ok := peer.Get(PeerStateKey).(*PeerState) - if !ok { - panic(fmt.Sprintf("Peer %v has no state", peer)) - } - prs := ps.GetRoundState() - if prs.Height == vote.Height { - // TODO: Also filter on round? - peer.TrySend(StateChannel, struct{ ConsensusMessage }{msg}) - } else { - // Height doesn't match - // TODO: check a field, maybe CatchupCommitRound? - // TODO: But that requires changing the struct field comment. - } - } - */ -} - -func makeRoundStepMessage(rs *cstypes.RoundState) (nrsMsg *tmcon.NewRoundStepMessage) { - nrsMsg = &tmcon.NewRoundStepMessage{ - Height: rs.Height, - Round: rs.Round, - Step: rs.Step, - SecondsSinceStartTime: int64(time.Since(rs.StartTime).Seconds()), - LastCommitRound: rs.LastCommit.GetRound(), - } - return -} - -func (conR *Reactor) sendNewRoundStepMessage(peer p2p.Peer) { - rs := conR.conS.GetRoundState() - nrsMsg := makeRoundStepMessage(rs) - peer.Send(StateChannel, tmcon.MustEncode(nrsMsg)) -} - -func (conR *Reactor) gossipDataRoutine(peer p2p.Peer, ps *PeerState) { - logger := conR.Logger.With("peer", peer) - -OUTER_LOOP: - for { - // Manage disconnects from self or peer. - if !peer.IsRunning() || !conR.IsRunning() { - logger.Info("Stopping gossipDataRoutine for peer") - return - } - rs := conR.conS.GetRoundState() - prs := ps.GetRoundState() - - // Send proposal Block parts? - if rs.ProposalBlockParts.HasHeader(prs.ProposalBlockPartSetHeader) { - if index, ok := rs.ProposalBlockParts.BitArray().Sub(prs.ProposalBlockParts.Copy()).PickRandom(); ok { - part := rs.ProposalBlockParts.GetPart(index) - msg := &tmcon.BlockPartMessage{ - Height: rs.Height, // This tells peer that this part applies to us. - Round: rs.Round, // This tells peer that this part applies to us. - Part: part, - } - logger.Debug("Sending block part", "height", prs.Height, "round", prs.Round) - if peer.Send(DataChannel, tmcon.MustEncode(msg)) { - ps.SetHasProposalBlockPart(prs.Height, prs.Round, index) - } - continue OUTER_LOOP - } - } - - // If the peer is on a previous height that we have, help catch up. - if (0 < prs.Height) && (prs.Height < rs.Height) && (prs.Height >= conR.conS.blockStore.Base()) { - heightLogger := logger.With("height", prs.Height) - - // if we never received the commit message from the peer, the block parts wont be initialized - if prs.ProposalBlockParts == nil { - blockMeta := conR.conS.blockStore.LoadBlockMeta(prs.Height) - if blockMeta == nil { - heightLogger.Error("Failed to load block meta", - "blockstoreBase", conR.conS.blockStore.Base(), "blockstoreHeight", conR.conS.blockStore.Height()) - time.Sleep(conR.conS.config.PeerGossipSleepDuration) - } else { - ps.InitProposalBlockParts(blockMeta.BlockID.PartSetHeader) - } - // continue the loop since prs is a copy and not effected by this initialization - continue OUTER_LOOP - } - conR.gossipDataForCatchup(heightLogger, rs, prs, ps, peer) - continue OUTER_LOOP - } - - // If height and round don't match, sleep. - if (rs.Height != prs.Height) || (rs.Round != prs.Round) { - time.Sleep(conR.conS.config.PeerGossipSleepDuration) - continue OUTER_LOOP - } - - // By here, height and round match. - // Proposal block parts were already matched and sent if any were wanted. - // (These can match on hash so the round doesn't matter) - // Now consider sending other things, like the Proposal itself. - - // Send Proposal && ProposalPOL BitArray? - if rs.Proposal != nil && !prs.Proposal { - // Proposal: share the proposal metadata with peer. - { - msg := &tmcon.ProposalMessage{Proposal: rs.Proposal} - logger.Debug("Sending proposal", "height", prs.Height, "round", prs.Round) - if peer.Send(DataChannel, tmcon.MustEncode(msg)) { - // NOTE[ZM]: A peer might have received different proposal msg so this Proposal msg will be rejected! - ps.SetHasProposal(rs.Proposal) - } - } - // ProposalPOL: lets peer know which POL votes we have so far. - // Peer must receive ProposalMessage first. - // rs.Proposal was validated, so rs.Proposal.POLRound <= rs.Round, - // so we definitely have rs.Votes.Prevotes(rs.Proposal.POLRound). - if 0 <= rs.Proposal.POLRound { - msg := &tmcon.ProposalPOLMessage{ - Height: rs.Height, - ProposalPOLRound: rs.Proposal.POLRound, - ProposalPOL: rs.Votes.Prevotes(rs.Proposal.POLRound).BitArray(), - } - logger.Debug("Sending POL", "height", prs.Height, "round", prs.Round) - peer.Send(DataChannel, tmcon.MustEncode(msg)) - } - continue OUTER_LOOP - } - - // Nothing to do. Sleep. - time.Sleep(conR.conS.config.PeerGossipSleepDuration) - continue OUTER_LOOP - } -} - -func (conR *Reactor) gossipDataForCatchup(logger log.Logger, rs *cstypes.RoundState, - prs *cstypes.PeerRoundState, ps *PeerState, peer p2p.Peer) { - - if index, ok := prs.ProposalBlockParts.Not().PickRandom(); ok { - // Ensure that the peer's PartSetHeader is correct - blockMeta := conR.conS.blockStore.LoadBlockMeta(prs.Height) - if blockMeta == nil { - logger.Error("Failed to load block meta", "ourHeight", rs.Height, - "blockstoreBase", conR.conS.blockStore.Base(), "blockstoreHeight", conR.conS.blockStore.Height()) - time.Sleep(conR.conS.config.PeerGossipSleepDuration) - return - } else if !blockMeta.BlockID.PartSetHeader.Equals(prs.ProposalBlockPartSetHeader) { - logger.Info("Peer ProposalBlockPartSetHeader mismatch, sleeping", - "blockPartSetHeader", blockMeta.BlockID.PartSetHeader, "peerBlockPartSetHeader", prs.ProposalBlockPartSetHeader) - time.Sleep(conR.conS.config.PeerGossipSleepDuration) - return - } - // Load the part - part := conR.conS.blockStore.LoadBlockPart(prs.Height, index) - if part == nil { - logger.Error("Could not load part", "index", index, - "blockPartSetHeader", blockMeta.BlockID.PartSetHeader, "peerBlockPartSetHeader", prs.ProposalBlockPartSetHeader) - time.Sleep(conR.conS.config.PeerGossipSleepDuration) - return - } - // Send the part - msg := &tmcon.BlockPartMessage{ - Height: prs.Height, // Not our height, so it doesn't matter. - Round: prs.Round, // Not our height, so it doesn't matter. - Part: part, - } - logger.Debug("Sending block part for catchup", "round", prs.Round, "index", index) - if peer.Send(DataChannel, tmcon.MustEncode(msg)) { - ps.SetHasProposalBlockPart(prs.Height, prs.Round, index) - } else { - logger.Debug("Sending block part for catchup failed") - } - return - } - time.Sleep(conR.conS.config.PeerGossipSleepDuration) -} - -func (conR *Reactor) gossipVotesRoutine(peer p2p.Peer, ps *PeerState) { - logger := conR.Logger.With("peer", peer) - - // Simple hack to throttle logs upon sleep. - var sleeping = 0 - -OUTER_LOOP: - for { - // Manage disconnects from self or peer. - if !peer.IsRunning() || !conR.IsRunning() { - logger.Info("Stopping gossipVotesRoutine for peer") - return - } - rs := conR.conS.GetRoundState() - prs := ps.GetRoundState() - - switch sleeping { - case 1: // First sleep - sleeping = 2 - case 2: // No more sleep - sleeping = 0 - } - - // If height matches, then send LastCommit, Prevotes, Precommits. - if rs.Height == prs.Height { - heightLogger := logger.With("height", prs.Height) - if conR.gossipVotesForHeight(heightLogger, rs, prs, ps) { - continue OUTER_LOOP - } - } - - // Special catchup logic. - // If peer is lagging by height 1, send LastCommit. - if prs.Height != 0 && rs.Height == prs.Height+1 { - if ps.PickSendVote(rs.LastCommit) { - logger.Debug("Picked rs.LastCommit to send", "height", prs.Height) - continue OUTER_LOOP - } - } - - // Catchup logic - // If peer is lagging by more than 1, send Commit. - if prs.Height != 0 && rs.Height >= prs.Height+2 && prs.Height >= conR.conS.blockStore.Base() { - // Load the block commit for prs.Height, - // which contains precommit signatures for prs.Height. - if commit := conR.conS.blockStore.LoadBlockCommit(prs.Height); commit != nil { - if ps.PickSendVote(commit) { - logger.Debug("Picked Catchup commit to send", "height", prs.Height) - continue OUTER_LOOP - } - } - } - - if sleeping == 0 { - // We sent nothing. Sleep... - sleeping = 1 - logger.Debug("No votes to send, sleeping", "rs.Height", rs.Height, "prs.Height", prs.Height, - "localPV", rs.Votes.Prevotes(rs.Round).BitArray(), "peerPV", prs.Prevotes, - "localPC", rs.Votes.Precommits(rs.Round).BitArray(), "peerPC", prs.Precommits) - } else if sleeping == 2 { - // Continued sleep... - sleeping = 1 - } - - time.Sleep(conR.conS.config.PeerGossipSleepDuration) - continue OUTER_LOOP - } -} - -func (conR *Reactor) gossipVotesForHeight( - logger log.Logger, - rs *cstypes.RoundState, - prs *cstypes.PeerRoundState, - ps *PeerState, -) bool { - - // If there are lastCommits to send... - if prs.Step == cstypes.RoundStepNewHeight { - if ps.PickSendVote(rs.LastCommit) { - logger.Debug("Picked rs.LastCommit to send") - return true - } - } - // If there are POL prevotes to send... - if prs.Step <= cstypes.RoundStepPropose && prs.Round != -1 && prs.Round <= rs.Round && prs.ProposalPOLRound != -1 { - if polPrevotes := rs.Votes.Prevotes(prs.ProposalPOLRound); polPrevotes != nil { - if ps.PickSendVote(polPrevotes) { - logger.Debug("Picked rs.Prevotes(prs.ProposalPOLRound) to send", - "round", prs.ProposalPOLRound) - return true - } - } - } - // If there are prevotes to send... - if prs.Step <= cstypes.RoundStepPrevoteWait && prs.Round != -1 && prs.Round <= rs.Round { - if ps.PickSendVote(rs.Votes.Prevotes(prs.Round)) { - logger.Debug("Picked rs.Prevotes(prs.Round) to send", "round", prs.Round) - return true - } - } - // If there are precommits to send... - if prs.Step <= cstypes.RoundStepPrecommitWait && prs.Round != -1 && prs.Round <= rs.Round { - if ps.PickSendVote(rs.Votes.Precommits(prs.Round)) { - logger.Debug("Picked rs.Precommits(prs.Round) to send", "round", prs.Round) - return true - } - } - // If there are prevotes to send...Needed because of validBlock mechanism - if prs.Round != -1 && prs.Round <= rs.Round { - if ps.PickSendVote(rs.Votes.Prevotes(prs.Round)) { - logger.Debug("Picked rs.Prevotes(prs.Round) to send", "round", prs.Round) - return true - } - } - // If there are POLPrevotes to send... - if prs.ProposalPOLRound != -1 { - if polPrevotes := rs.Votes.Prevotes(prs.ProposalPOLRound); polPrevotes != nil { - if ps.PickSendVote(polPrevotes) { - logger.Debug("Picked rs.Prevotes(prs.ProposalPOLRound) to send", - "round", prs.ProposalPOLRound) - return true - } - } - } - - return false -} - -// NOTE: `queryMaj23Routine` has a simple crude design since it only comes -// into play for liveness when there's a signature DDoS attack happening. -func (conR *Reactor) queryMaj23Routine(peer p2p.Peer, ps *PeerState) { - logger := conR.Logger.With("peer", peer) - -OUTER_LOOP: - for { - // Manage disconnects from self or peer. - if !peer.IsRunning() || !conR.IsRunning() { - logger.Info("Stopping queryMaj23Routine for peer") - return - } - - // Maybe send Height/Round/Prevotes - { - rs := conR.conS.GetRoundState() - prs := ps.GetRoundState() - if rs.Height == prs.Height { - if maj23, ok := rs.Votes.Prevotes(prs.Round).TwoThirdsMajority(); ok { - peer.TrySend(StateChannel, tmcon.MustEncode(&tmcon.VoteSetMaj23Message{ - Height: prs.Height, - Round: prs.Round, - Type: tmproto.PrevoteType, - BlockID: maj23, - })) - time.Sleep(conR.conS.config.PeerQueryMaj23SleepDuration) - } - } - } - - // Maybe send Height/Round/Precommits - { - rs := conR.conS.GetRoundState() - prs := ps.GetRoundState() - if rs.Height == prs.Height { - if maj23, ok := rs.Votes.Precommits(prs.Round).TwoThirdsMajority(); ok { - peer.TrySend(StateChannel, tmcon.MustEncode(&tmcon.VoteSetMaj23Message{ - Height: prs.Height, - Round: prs.Round, - Type: tmproto.PrecommitType, - BlockID: maj23, - })) - time.Sleep(conR.conS.config.PeerQueryMaj23SleepDuration) - } - } - } - - // Maybe send Height/Round/ProposalPOL - { - rs := conR.conS.GetRoundState() - prs := ps.GetRoundState() - if rs.Height == prs.Height && prs.ProposalPOLRound >= 0 { - if maj23, ok := rs.Votes.Prevotes(prs.ProposalPOLRound).TwoThirdsMajority(); ok { - peer.TrySend(StateChannel, tmcon.MustEncode(&tmcon.VoteSetMaj23Message{ - Height: prs.Height, - Round: prs.ProposalPOLRound, - Type: tmproto.PrevoteType, - BlockID: maj23, - })) - time.Sleep(conR.conS.config.PeerQueryMaj23SleepDuration) - } - } - } - - // Little point sending LastCommitRound/LastCommit, - // These are fleeting and non-blocking. - - // Maybe send Height/CatchupCommitRound/CatchupCommit. - { - prs := ps.GetRoundState() - if prs.CatchupCommitRound != -1 && prs.Height > 0 && prs.Height <= conR.conS.blockStore.Height() && - prs.Height >= conR.conS.blockStore.Base() { - if commit := conR.conS.LoadCommit(prs.Height); commit != nil { - peer.TrySend(StateChannel, tmcon.MustEncode(&tmcon.VoteSetMaj23Message{ - Height: prs.Height, - Round: commit.Round, - Type: tmproto.PrecommitType, - BlockID: commit.BlockID, - })) - time.Sleep(conR.conS.config.PeerQueryMaj23SleepDuration) - } - } - } - - time.Sleep(conR.conS.config.PeerQueryMaj23SleepDuration) - - continue OUTER_LOOP - } -} - -func (conR *Reactor) peerStatsRoutine() { - for { - if !conR.IsRunning() { - conR.Logger.Info("Stopping peerStatsRoutine") - return - } - - select { - case msg := <-conR.conS.statsMsgQueue: - // Get peer - peer := conR.Switch.Peers().Get(msg.PeerID) - if peer == nil { - conR.Logger.Debug("Attempt to update stats for non-existent peer", - "peer", msg.PeerID) - continue - } - // Get peer state - ps, ok := peer.Get(types.PeerStateKey).(*PeerState) - if !ok { - panic(fmt.Sprintf("Peer %v has no state", peer)) - } - switch msg.Msg.(type) { - case *tmcon.VoteMessage: - if numVotes := ps.RecordVote(); numVotes%votesToContributeToBecomeGoodPeer == 0 { - conR.Switch.MarkPeerAsGood(peer) - } - case *tmcon.BlockPartMessage: - if numParts := ps.RecordBlockPart(); numParts%blocksToContributeToBecomeGoodPeer == 0 { - conR.Switch.MarkPeerAsGood(peer) - } - } - case <-conR.conS.Quit(): - return - - case <-conR.Quit(): - return - } - } -} - -// String returns a string representation of the Reactor. -// NOTE: For now, it is just a hard-coded string to avoid accessing unprotected shared variables. -// TODO: improve! -func (conR *Reactor) String() string { - // better not to access shared variables - return "ConsensusReactor" // conR.StringIndented("") -} - -// StringIndented returns an indented string representation of the Reactor -func (conR *Reactor) StringIndented(indent string) string { - s := "ConsensusReactor{\n" - s += indent + " " + conR.conS.StringIndented(indent+" ") + "\n" - for _, peer := range conR.Switch.Peers().List() { - ps, ok := peer.Get(types.PeerStateKey).(*PeerState) - if !ok { - panic(fmt.Sprintf("Peer %v has no state", peer)) - } - s += indent + " " + ps.StringIndented(indent+" ") + "\n" - } - s += indent + "}" - return s -} - -// ReactorMetrics sets the metrics -func ReactorMetrics(metrics *tmcon.Metrics) ReactorOption { - return func(conR *Reactor) { conR.Metrics = metrics } -} - -//----------------------------------------------------------------------------- - -var ( - ErrPeerStateHeightRegression = errors.New("error peer state height regression") - ErrPeerStateInvalidStartTime = errors.New("error peer state invalid startTime") -) - -// PeerState contains the known state of a peer, including its connection and -// threadsafe access to its PeerRoundState. -// NOTE: THIS GETS DUMPED WITH rpc/core/consensus.go. -// Be mindful of what you Expose. -type PeerState struct { - peer p2p.Peer - logger log.Logger - - mtx sync.Mutex // NOTE: Modify below using setters, never directly. - PRS cstypes.PeerRoundState `json:"round_state"` // Exposed. - Stats *peerStateStats `json:"stats"` // Exposed. -} - -// peerStateStats holds internal statistics for a peer. -type peerStateStats struct { - Votes int `json:"votes"` - BlockParts int `json:"block_parts"` -} - -func (pss peerStateStats) String() string { - return fmt.Sprintf("peerStateStats{votes: %d, blockParts: %d}", - pss.Votes, pss.BlockParts) -} - -// NewPeerState returns a new PeerState for the given Peer -func NewPeerState(peer p2p.Peer) *PeerState { - return &PeerState{ - peer: peer, - logger: log.NewNopLogger(), - PRS: cstypes.PeerRoundState{ - Round: -1, - ProposalPOLRound: -1, - LastCommitRound: -1, - CatchupCommitRound: -1, - }, - Stats: &peerStateStats{}, - } -} - -// SetLogger allows to set a logger on the peer state. Returns the peer state -// itself. -func (ps *PeerState) SetLogger(logger log.Logger) *PeerState { - ps.logger = logger - return ps -} - -// GetRoundState returns an shallow copy of the PeerRoundState. -// There's no point in mutating it since it won't change PeerState. -func (ps *PeerState) GetRoundState() *cstypes.PeerRoundState { - ps.mtx.Lock() - defer ps.mtx.Unlock() - - prs := ps.PRS // copy - return &prs -} - -// ToJSON returns a json of PeerState. -func (ps *PeerState) ToJSON() ([]byte, error) { - ps.mtx.Lock() - defer ps.mtx.Unlock() - - return tmjson.Marshal(ps) -} - -// GetHeight returns an atomic snapshot of the PeerRoundState's height -// used by the mempool to ensure peers are caught up before broadcasting new txs -func (ps *PeerState) GetHeight() int64 { - ps.mtx.Lock() - defer ps.mtx.Unlock() - return ps.PRS.Height -} - -// SetHasProposal sets the given proposal as known for the peer. -func (ps *PeerState) SetHasProposal(proposal *types.Proposal) { - ps.mtx.Lock() - defer ps.mtx.Unlock() - - if ps.PRS.Height != proposal.Height || ps.PRS.Round != proposal.Round { - return - } - - if ps.PRS.Proposal { - return - } - - ps.PRS.Proposal = true - - // ps.PRS.ProposalBlockParts is set due to NewValidBlockMessage - if ps.PRS.ProposalBlockParts != nil { - return - } - - ps.PRS.ProposalBlockPartSetHeader = proposal.BlockID.PartSetHeader - ps.PRS.ProposalBlockParts = bits.NewBitArray(int(proposal.BlockID.PartSetHeader.Total)) - ps.PRS.ProposalPOLRound = proposal.POLRound - ps.PRS.ProposalPOL = nil // Nil until ProposalPOLMessage received. -} - -// InitProposalBlockParts initializes the peer's proposal block parts header and bit array. -func (ps *PeerState) InitProposalBlockParts(partSetHeader types.PartSetHeader) { - ps.mtx.Lock() - defer ps.mtx.Unlock() - - if ps.PRS.ProposalBlockParts != nil { - return - } - - ps.PRS.ProposalBlockPartSetHeader = partSetHeader - ps.PRS.ProposalBlockParts = bits.NewBitArray(int(partSetHeader.Total)) -} - -// SetHasProposalBlockPart sets the given block part index as known for the peer. -func (ps *PeerState) SetHasProposalBlockPart(height int64, round int32, index int) { - ps.mtx.Lock() - defer ps.mtx.Unlock() - - if ps.PRS.Height != height || ps.PRS.Round != round { - return - } - - ps.PRS.ProposalBlockParts.SetIndex(index, true) -} - -// PickSendVote picks a vote and sends it to the peer. -// Returns true if vote was sent. -func (ps *PeerState) PickSendVote(votes types.VoteSetReader) bool { - if vote, ok := ps.PickVoteToSend(votes); ok { - msg := &tmcon.VoteMessage{Vote: vote} - ps.logger.Debug("Sending vote message", "ps", ps, "vote", vote) - if ps.peer.Send(VoteChannel, tmcon.MustEncode(msg)) { - ps.SetHasVote(vote) - return true - } - return false - } - return false -} - -// PickVoteToSend picks a vote to send to the peer. -// Returns true if a vote was picked. -// NOTE: `votes` must be the correct Size() for the Height(). -func (ps *PeerState) PickVoteToSend(votes types.VoteSetReader) (vote *types.Vote, ok bool) { - ps.mtx.Lock() - defer ps.mtx.Unlock() - - if votes.Size() == 0 { - return nil, false - } - - height, round, votesType, size := - votes.GetHeight(), votes.GetRound(), tmproto.SignedMsgType(votes.Type()), votes.Size() - - // Lazily set data using 'votes'. - if votes.IsCommit() { - ps.ensureCatchupCommitRound(height, round, size) - } - ps.ensureVoteBitArrays(height, size) - - psVotes := ps.getVoteBitArray(height, round, votesType) - if psVotes == nil { - return nil, false // Not something worth sending - } - if index, ok := votes.BitArray().Sub(psVotes).PickRandom(); ok { - return votes.GetByIndex(int32(index)), true - } - return nil, false -} - -func (ps *PeerState) getVoteBitArray(height int64, round int32, votesType tmproto.SignedMsgType) *bits.BitArray { - if !types.IsVoteTypeValid(votesType) { - return nil - } - - if ps.PRS.Height == height { - if ps.PRS.Round == round { - switch votesType { - case tmproto.PrevoteType: - return ps.PRS.Prevotes - case tmproto.PrecommitType: - return ps.PRS.Precommits - } - } - if ps.PRS.CatchupCommitRound == round { - switch votesType { - case tmproto.PrevoteType: - return nil - case tmproto.PrecommitType: - return ps.PRS.CatchupCommit - } - } - if ps.PRS.ProposalPOLRound == round { - switch votesType { - case tmproto.PrevoteType: - return ps.PRS.ProposalPOL - case tmproto.PrecommitType: - return nil - } - } - return nil - } - if ps.PRS.Height == height+1 { - if ps.PRS.LastCommitRound == round { - switch votesType { - case tmproto.PrevoteType: - return nil - case tmproto.PrecommitType: - return ps.PRS.LastCommit - } - } - return nil - } - return nil -} - -// 'round': A round for which we have a +2/3 commit. -func (ps *PeerState) ensureCatchupCommitRound(height int64, round int32, numVoters int) { - if ps.PRS.Height != height { - return - } - /* - NOTE: This is wrong, 'round' could change. - e.g. if orig round is not the same as block LastCommit round. - if ps.CatchupCommitRound != -1 && ps.CatchupCommitRound != round { - panic(fmt.Sprintf( - "Conflicting CatchupCommitRound. Height: %v, - Orig: %v, - New: %v", - height, - ps.CatchupCommitRound, - round)) - } - */ - if ps.PRS.CatchupCommitRound == round { - return // Nothing to do! - } - ps.PRS.CatchupCommitRound = round - if round == ps.PRS.Round { - ps.PRS.CatchupCommit = ps.PRS.Precommits - } else { - ps.PRS.CatchupCommit = bits.NewBitArray(numVoters) - } -} - -// EnsureVoteBitArrays ensures the bit-arrays have been allocated for tracking -// what votes this peer has received. -// NOTE: It's important to make sure that numVoters actually matches -// what the node sees as the number of voters for height. -func (ps *PeerState) EnsureVoteBitArrays(height int64, numVoters int) { - ps.mtx.Lock() - defer ps.mtx.Unlock() - ps.ensureVoteBitArrays(height, numVoters) -} - -func (ps *PeerState) ensureVoteBitArrays(height int64, numVoters int) { - if ps.PRS.Height == height { - if ps.PRS.Prevotes == nil { - ps.PRS.Prevotes = bits.NewBitArray(numVoters) - } - if ps.PRS.Precommits == nil { - ps.PRS.Precommits = bits.NewBitArray(numVoters) - } - if ps.PRS.CatchupCommit == nil { - ps.PRS.CatchupCommit = bits.NewBitArray(numVoters) - } - if ps.PRS.ProposalPOL == nil { - ps.PRS.ProposalPOL = bits.NewBitArray(numVoters) - } - } else if ps.PRS.Height == height+1 { - if ps.PRS.LastCommit == nil { - ps.PRS.LastCommit = bits.NewBitArray(numVoters) - } - } -} - -// RecordVote increments internal votes related statistics for this peer. -// It returns the total number of added votes. -func (ps *PeerState) RecordVote() int { - ps.mtx.Lock() - defer ps.mtx.Unlock() - - ps.Stats.Votes++ - - return ps.Stats.Votes -} - -// VotesSent returns the number of blocks for which peer has been sending us -// votes. -func (ps *PeerState) VotesSent() int { - ps.mtx.Lock() - defer ps.mtx.Unlock() - - return ps.Stats.Votes -} - -// RecordBlockPart increments internal block part related statistics for this peer. -// It returns the total number of added block parts. -func (ps *PeerState) RecordBlockPart() int { - ps.mtx.Lock() - defer ps.mtx.Unlock() - - ps.Stats.BlockParts++ - return ps.Stats.BlockParts -} - -// BlockPartsSent returns the number of useful block parts the peer has sent us. -func (ps *PeerState) BlockPartsSent() int { - ps.mtx.Lock() - defer ps.mtx.Unlock() - - return ps.Stats.BlockParts -} - -// SetHasVote sets the given vote as known by the peer -func (ps *PeerState) SetHasVote(vote *types.Vote) { - ps.mtx.Lock() - defer ps.mtx.Unlock() - - ps.setHasVote(vote.Height, vote.Round, vote.Type, vote.ValidatorIndex) -} - -func (ps *PeerState) setHasVote(height int64, round int32, voteType tmproto.SignedMsgType, index int32) { - logger := ps.logger.With( - "peerH/R", - fmt.Sprintf("%d/%d", ps.PRS.Height, ps.PRS.Round), - "H/R", - fmt.Sprintf("%d/%d", height, round)) - logger.Debug("setHasVote", "type", voteType, "index", index) - - // NOTE: some may be nil BitArrays -> no side effects. - psVotes := ps.getVoteBitArray(height, round, voteType) - if psVotes != nil { - psVotes.SetIndex(int(index), true) - } -} - -// ApplyNewRoundStepMessage updates the peer state for the new round. -func (ps *PeerState) ApplyNewRoundStepMessage(msg *tmcon.NewRoundStepMessage) { - ps.mtx.Lock() - defer ps.mtx.Unlock() - - // Ignore duplicates or decreases - if CompareHRS(msg.Height, msg.Round, msg.Step, ps.PRS.Height, ps.PRS.Round, ps.PRS.Step) <= 0 { - return - } - - // Just remember these values. - psHeight := ps.PRS.Height - psRound := ps.PRS.Round - psCatchupCommitRound := ps.PRS.CatchupCommitRound - psCatchupCommit := ps.PRS.CatchupCommit - - startTime := tmtime.Now().Add(-1 * time.Duration(msg.SecondsSinceStartTime) * time.Second) - ps.PRS.Height = msg.Height - ps.PRS.Round = msg.Round - ps.PRS.Step = msg.Step - ps.PRS.StartTime = startTime - if psHeight != msg.Height || psRound != msg.Round { - ps.PRS.Proposal = false - ps.PRS.ProposalBlockPartSetHeader = types.PartSetHeader{} - ps.PRS.ProposalBlockParts = nil - ps.PRS.ProposalPOLRound = -1 - ps.PRS.ProposalPOL = nil - // We'll update the BitArray capacity later. - ps.PRS.Prevotes = nil - ps.PRS.Precommits = nil - } - if psHeight == msg.Height && psRound != msg.Round && msg.Round == psCatchupCommitRound { - // Peer caught up to CatchupCommitRound. - // Preserve psCatchupCommit! - // NOTE: We prefer to use prs.Precommits if - // pr.Round matches pr.CatchupCommitRound. - ps.PRS.Precommits = psCatchupCommit - } - if psHeight != msg.Height { - // Shift Precommits to LastCommit. - if psHeight+1 == msg.Height && psRound == msg.LastCommitRound { - ps.PRS.LastCommitRound = msg.LastCommitRound - ps.PRS.LastCommit = ps.PRS.Precommits - } else { - ps.PRS.LastCommitRound = msg.LastCommitRound - ps.PRS.LastCommit = nil - } - // We'll update the BitArray capacity later. - ps.PRS.CatchupCommitRound = -1 - ps.PRS.CatchupCommit = nil - } -} - -// ApplyNewValidBlockMessage updates the peer state for the new valid block. -func (ps *PeerState) ApplyNewValidBlockMessage(msg *tmcon.NewValidBlockMessage) { - ps.mtx.Lock() - defer ps.mtx.Unlock() - - if ps.PRS.Height != msg.Height { - return - } - - if ps.PRS.Round != msg.Round && !msg.IsCommit { - return - } - - ps.PRS.ProposalBlockPartSetHeader = msg.BlockPartSetHeader - ps.PRS.ProposalBlockParts = msg.BlockParts -} - -// ApplyProposalPOLMessage updates the peer state for the new proposal POL. -func (ps *PeerState) ApplyProposalPOLMessage(msg *tmcon.ProposalPOLMessage) { - ps.mtx.Lock() - defer ps.mtx.Unlock() - - if ps.PRS.Height != msg.Height { - return - } - if ps.PRS.ProposalPOLRound != msg.ProposalPOLRound { - return - } - - // TODO: Merge onto existing ps.PRS.ProposalPOL? - // We might have sent some prevotes in the meantime. - ps.PRS.ProposalPOL = msg.ProposalPOL -} - -// ApplyHasVoteMessage updates the peer state for the new vote. -func (ps *PeerState) ApplyHasVoteMessage(msg *tmcon.HasVoteMessage) { - ps.mtx.Lock() - defer ps.mtx.Unlock() - - if ps.PRS.Height != msg.Height { - return - } - - ps.setHasVote(msg.Height, msg.Round, msg.Type, msg.Index) -} - -// ApplyVoteSetBitsMessage updates the peer state for the bit-array of votes -// it claims to have for the corresponding BlockID. -// `ourVotes` is a BitArray of votes we have for msg.BlockID -// NOTE: if ourVotes is nil (e.g. msg.Height < rs.Height), -// we conservatively overwrite ps's votes w/ msg.Votes. -func (ps *PeerState) ApplyVoteSetBitsMessage(msg *tmcon.VoteSetBitsMessage, ourVotes *bits.BitArray) { - ps.mtx.Lock() - defer ps.mtx.Unlock() - - votes := ps.getVoteBitArray(msg.Height, msg.Round, msg.Type) - if votes != nil { - if ourVotes == nil { - votes.Update(msg.Votes) - } else { - otherVotes := votes.Sub(ourVotes) - hasVotes := otherVotes.Or(msg.Votes) - votes.Update(hasVotes) - } - } -} - -// String returns a string representation of the PeerState -func (ps *PeerState) String() string { - return ps.StringIndented("") -} - -// StringIndented returns a string representation of the PeerState -func (ps *PeerState) StringIndented(indent string) string { - ps.mtx.Lock() - defer ps.mtx.Unlock() - return fmt.Sprintf(`PeerState{ -%s Key %v -%s RoundState %v -%s Stats %v -%s}`, - indent, ps.peer.ID(), - indent, ps.PRS.StringIndented(indent+" "), - indent, ps.Stats, - indent) -} - -//----------------------------------------------------------------------------- - -// func init() { -// tmjson.RegisterType(&NewRoundStepMessage{}, "ostracon/NewRoundStepMessage") -// tmjson.RegisterType(&NewValidBlockMessage{}, "ostracon/NewValidBlockMessage") -// tmjson.RegisterType(&ProposalMessage{}, "ostracon/Proposal") -// tmjson.RegisterType(&ProposalPOLMessage{}, "ostracon/ProposalPOL") -// tmjson.RegisterType(&BlockPartMessage{}, "ostracon/BlockPart") -// tmjson.RegisterType(&VoteMessage{}, "ostracon/Vote") -// tmjson.RegisterType(&HasVoteMessage{}, "ostracon/HasVote") -// tmjson.RegisterType(&VoteSetMaj23Message{}, "ostracon/VoteSetMaj23") -// tmjson.RegisterType(&VoteSetBitsMessage{}, "ostracon/VoteSetBits") -// } - -func decodeMsg(bz []byte) (msg tmcon.Message, err error) { - pb := &tmcons.Message{} - if err = proto.Unmarshal(bz, pb); err != nil { - return msg, err - } - - return tmcon.MsgFromProto(pb) -} diff --git a/test/maverick/consensus/replay.go b/test/maverick/consensus/replay.go deleted file mode 100644 index dd182b12c..000000000 --- a/test/maverick/consensus/replay.go +++ /dev/null @@ -1,551 +0,0 @@ -package consensus - -import ( - "bytes" - "fmt" - "hash/crc32" - "io" - "reflect" - "time" - - abci "github.com/line/ostracon/abci/types" - tmcon "github.com/line/ostracon/consensus" - "github.com/line/ostracon/crypto/merkle" - "github.com/line/ostracon/libs/log" - "github.com/line/ostracon/proxy" - sm "github.com/line/ostracon/state" - "github.com/line/ostracon/types" -) - -var crc32c = crc32.MakeTable(crc32.Castagnoli) - -// Functionality to replay blocks and messages on recovery from a crash. -// There are two general failure scenarios: -// -// 1. failure during consensus -// 2. failure while applying the block -// -// The former is handled by the WAL, the latter by the proxyApp Handshake on -// restart, which ultimately hands off the work to the WAL. - -//----------------------------------------- -// 1. Recover from failure during consensus -// (by replaying messages from the WAL) -//----------------------------------------- - -// Unmarshal and apply a single message to the consensus state as if it were -// received in receiveRoutine. Lines that start with "#" are ignored. -// NOTE: receiveRoutine should not be running. -func (cs *State) readReplayMessage(msg *tmcon.TimedWALMessage, newStepSub types.Subscription) error { - // Skip meta messages which exist for demarcating boundaries. - if _, ok := msg.Msg.(tmcon.EndHeightMessage); ok { - return nil - } - - // for logging - switch m := msg.Msg.(type) { - case types.EventDataRoundState: - cs.Logger.Info("Replay: New Step", "height", m.Height, "round", m.Round, "step", m.Step) - // these are playback checks - ticker := time.After(time.Second * 2) - if newStepSub != nil { - select { - case stepMsg := <-newStepSub.Out(): - m2 := stepMsg.Data().(types.EventDataRoundState) - if m.Height != m2.Height || m.Round != m2.Round || m.Step != m2.Step { - return fmt.Errorf("roundState mismatch. Got %v; Expected %v", m2, m) - } - case <-newStepSub.Cancelled(): - return fmt.Errorf("failed to read off newStepSub.Out(). newStepSub was cancelled") - case <-ticker: - return fmt.Errorf("failed to read off newStepSub.Out()") - } - } - case msgInfo: - peerID := m.PeerID - if peerID == "" { - peerID = "local" - } - switch msg := m.Msg.(type) { - case *tmcon.ProposalMessage: - p := msg.Proposal - cs.Logger.Info("Replay: Proposal", "height", p.Height, "round", p.Round, "header", - p.BlockID.PartSetHeader, "pol", p.POLRound, "peer", peerID) - case *tmcon.BlockPartMessage: - cs.Logger.Info("Replay: BlockPart", "height", msg.Height, "round", msg.Round, "peer", peerID) - case *tmcon.VoteMessage: - v := msg.Vote - cs.Logger.Info("Replay: Vote", "height", v.Height, "round", v.Round, "type", v.Type, - "blockID", v.BlockID, "peer", peerID) - } - - cs.handleMsg(m) - case timeoutInfo: - cs.Logger.Info("Replay: Timeout", "height", m.Height, "round", m.Round, "step", m.Step, "dur", m.Duration) - cs.handleTimeout(m, cs.RoundState) - default: - return fmt.Errorf("replay: Unknown TimedWALMessage type: %v", reflect.TypeOf(msg.Msg)) - } - return nil -} - -// Replay only those messages since the last block. `timeoutRoutine` should -// run concurrently to read off tickChan. -func (cs *State) catchupReplay(csHeight int64) error { - - // Set replayMode to true so we don't log signing errors. - cs.replayMode = true - defer func() { cs.replayMode = false }() - - // Ensure that #ENDHEIGHT for this height doesn't exist. - // NOTE: This is just a sanity check. As far as we know things work fine - // without it, and Handshake could reuse State if it weren't for - // this check (since we can crash after writing #ENDHEIGHT). - // - // Ignore data corruption errors since this is a sanity check. - gr, found, err := cs.wal.SearchForEndHeight(csHeight, &tmcon.WALSearchOptions{IgnoreDataCorruptionErrors: true}) - if err != nil { - return err - } - if gr != nil { - if err := gr.Close(); err != nil { - return err - } - } - if found { - return fmt.Errorf("wal should not contain #ENDHEIGHT %d", csHeight) - } - - // Search for last height marker. - // - // Ignore data corruption errors in previous heights because we only care about last height - if csHeight < cs.state.InitialHeight { - return fmt.Errorf("cannot replay height %v, below initial height %v", csHeight, cs.state.InitialHeight) - } - endHeight := csHeight - 1 - if csHeight == cs.state.InitialHeight { - endHeight = 0 - } - gr, found, err = cs.wal.SearchForEndHeight(endHeight, &tmcon.WALSearchOptions{IgnoreDataCorruptionErrors: true}) - if err == io.EOF { - cs.Logger.Error("Replay: wal.group.Search returned EOF", "#ENDHEIGHT", endHeight) - } else if err != nil { - return err - } - if !found { - return fmt.Errorf("cannot replay height %d. WAL does not contain #ENDHEIGHT for %d", csHeight, endHeight) - } - defer gr.Close() - - cs.Logger.Info("Catchup by replaying consensus messages", "height", csHeight) - - var msg *tmcon.TimedWALMessage - dec := WALDecoder{gr} - -LOOP: - for { - msg, err = dec.Decode() - switch { - case err == io.EOF: - break LOOP - case IsDataCorruptionError(err): - cs.Logger.Error("data has been corrupted in last height of consensus WAL", "err", err, "height", csHeight) - return err - case err != nil: - return err - } - - // NOTE: since the priv key is set when the msgs are received - // it will attempt to eg double sign but we can just ignore it - // since the votes will be replayed and we'll get to the next step - if err := cs.readReplayMessage(msg, nil); err != nil { - return err - } - } - cs.Logger.Info("Replay: Done") - return nil -} - -//-------------------------------------------------------------------------------- - -// Parses marker lines of the form: -// #ENDHEIGHT: 12345 -/* -func makeHeightSearchFunc(height int64) auto.SearchFunc { - return func(line string) (int, error) { - line = strings.TrimRight(line, "\n") - parts := strings.Split(line, " ") - if len(parts) != 2 { - return -1, errors.New("line did not have 2 parts") - } - i, err := strconv.Atoi(parts[1]) - if err != nil { - return -1, errors.New("failed to parse INFO: " + err.Error()) - } - if height < i { - return 1, nil - } else if height == i { - return 0, nil - } else { - return -1, nil - } - } -}*/ - -//--------------------------------------------------- -// 2. Recover from failure while applying the block. -// (by handshaking with the app to figure out where -// we were last, and using the WAL to recover there.) -//--------------------------------------------------- - -type Handshaker struct { - stateStore sm.Store - initialState sm.State - store sm.BlockStore - eventBus types.BlockEventPublisher - genDoc *types.GenesisDoc - logger log.Logger - - nBlocks int // number of blocks applied to the state -} - -func NewHandshaker(stateStore sm.Store, state sm.State, - store sm.BlockStore, genDoc *types.GenesisDoc) *Handshaker { - - return &Handshaker{ - stateStore: stateStore, - initialState: state, - store: store, - eventBus: types.NopEventBus{}, - genDoc: genDoc, - logger: log.NewNopLogger(), - nBlocks: 0, - } -} - -func (h *Handshaker) SetLogger(l log.Logger) { - h.logger = l -} - -// SetEventBus - sets the event bus for publishing block related events. -// If not called, it defaults to types.NopEventBus. -func (h *Handshaker) SetEventBus(eventBus types.BlockEventPublisher) { - h.eventBus = eventBus -} - -// NBlocks returns the number of blocks applied to the state. -func (h *Handshaker) NBlocks() int { - return h.nBlocks -} - -// TODO: retry the handshake/replay if it fails ? -func (h *Handshaker) Handshake(proxyApp proxy.AppConns) error { - - // Handshake is done via ABCI Info on the query conn. - res, err := proxyApp.Query().InfoSync(proxy.RequestInfo) - if err != nil { - return fmt.Errorf("error calling Info: %v", err) - } - - blockHeight := res.LastBlockHeight - if blockHeight < 0 { - return fmt.Errorf("got a negative last block height (%d) from the app", blockHeight) - } - appHash := res.LastBlockAppHash - - h.logger.Info("ABCI Handshake App Info", - "height", blockHeight, - "hash", appHash, - "software-version", res.Version, - "protocol-version", res.AppVersion, - ) - - // Only set the version if there is no existing state. - if h.initialState.LastBlockHeight == 0 { - h.initialState.ConsensusParams.Version.AppVersion = res.AppVersion - h.initialState.Version.Consensus.App = res.AppVersion - } - - // Replay blocks up to the latest in the blockstore. - _, err = h.ReplayBlocks(h.initialState, appHash, blockHeight, proxyApp) - if err != nil { - return fmt.Errorf("error on replay: %v", err) - } - - h.logger.Info("Completed ABCI Handshake - Ostracon and App are synced", - "appHeight", blockHeight, "appHash", appHash) - - // TODO: (on restart) replay mempool - - return nil -} - -// ReplayBlocks replays all blocks since appBlockHeight and ensures the result -// matches the current state. -// Returns the final AppHash or an error. -func (h *Handshaker) ReplayBlocks( - state sm.State, - appHash []byte, - appBlockHeight int64, - proxyApp proxy.AppConns, -) ([]byte, error) { - storeBlockBase := h.store.Base() - storeBlockHeight := h.store.Height() - stateBlockHeight := state.LastBlockHeight - h.logger.Info( - "ABCI Replay Blocks", - "appHeight", - appBlockHeight, - "storeHeight", - storeBlockHeight, - "stateHeight", - stateBlockHeight) - - // If appBlockHeight == 0 it means that we are at genesis and hence should send InitChain. - if appBlockHeight == 0 { - validators := make([]*types.Validator, len(h.genDoc.Validators)) - for i, val := range h.genDoc.Validators { - validators[i] = types.NewValidator(val.PubKey, val.Power) - } - validatorSet := types.NewValidatorSet(validators) - nextVals := types.OC2PB.ValidatorUpdates(validatorSet) - csParams := types.OC2PB.ConsensusParams(h.genDoc.ConsensusParams) - req := abci.RequestInitChain{ - Time: h.genDoc.GenesisTime, - ChainId: h.genDoc.ChainID, - InitialHeight: h.genDoc.InitialHeight, - ConsensusParams: csParams, - Validators: nextVals, // ValidatorOrVoter: validator - AppStateBytes: h.genDoc.AppState, - } - res, err := proxyApp.Consensus().InitChainSync(req) - if err != nil { - return nil, err - } - - appHash = res.AppHash - - if stateBlockHeight == 0 { // we only update state when we are in initial state - // If the app did not return an app hash, we keep the one set from the genesis doc in - // the state. We don't set appHash since we don't want the genesis doc app hash - // recorded in the genesis block. We should probably just remove GenesisDoc.AppHash. - if len(res.AppHash) > 0 { - state.AppHash = res.AppHash - } - // If the app returned validators or consensus params, update the state. - if len(res.Validators) > 0 { - vals, err := types.PB2OC.ValidatorUpdates(res.Validators) - if err != nil { - return nil, err - } - state.Validators = types.NewValidatorSet(vals) - state.Voters = types.SelectVoter(state.Validators, h.genDoc.Hash(), state.VoterParams) - // Should sync it with MakeGenesisState() - state.NextValidators = types.NewValidatorSet(vals) - } else if len(h.genDoc.Validators) == 0 { - // If validator set is not set in genesis and still empty after InitChain, exit. - return nil, fmt.Errorf("validator set is nil in genesis and still empty after InitChain") - } - - if res.ConsensusParams != nil { - state.ConsensusParams = types.UpdateConsensusParams(state.ConsensusParams, res.ConsensusParams) - state.Version.Consensus.App = state.ConsensusParams.Version.AppVersion - } - // We update the last results hash with the empty hash, to conform with RFC-6962. - state.LastResultsHash = merkle.HashFromByteSlices(nil) - if err := h.stateStore.Save(state); err != nil { - return nil, err - } - } - } - - // First handle edge cases and constraints on the storeBlockHeight and storeBlockBase. - switch { - case storeBlockHeight == 0: - assertAppHashEqualsOneFromState(appHash, state) - return appHash, nil - - case appBlockHeight == 0 && state.InitialHeight < storeBlockBase: - // the app has no state, and the block store is truncated above the initial height - return appHash, sm.ErrAppBlockHeightTooLow{AppHeight: appBlockHeight, StoreBase: storeBlockBase} - - case appBlockHeight > 0 && appBlockHeight < storeBlockBase-1: - // the app is too far behind truncated store (can be 1 behind since we replay the next) - return appHash, sm.ErrAppBlockHeightTooLow{AppHeight: appBlockHeight, StoreBase: storeBlockBase} - - case storeBlockHeight < appBlockHeight: - // the app should never be ahead of the store (but this is under app's control) - return appHash, sm.ErrAppBlockHeightTooHigh{CoreHeight: storeBlockHeight, AppHeight: appBlockHeight} - - case storeBlockHeight < stateBlockHeight: - // the state should never be ahead of the store (this is under ostracon's control) - panic(fmt.Sprintf("StateBlockHeight (%d) > StoreBlockHeight (%d)", stateBlockHeight, storeBlockHeight)) - - case storeBlockHeight > stateBlockHeight+1: - // store should be at most one ahead of the state (this is under ostracon's control) - panic(fmt.Sprintf("StoreBlockHeight (%d) > StateBlockHeight + 1 (%d)", storeBlockHeight, stateBlockHeight+1)) - } - - var err error - // Now either store is equal to state, or one ahead. - // For each, consider all cases of where the app could be, given app <= store - if storeBlockHeight == stateBlockHeight { - // Ostracon ran Commit and saved the state. - // Either the app is asking for replay, or we're all synced up. - if appBlockHeight < storeBlockHeight { - // the app is behind, so replay blocks, but no need to go through WAL (state is already synced to store) - return h.replayBlocks(state, proxyApp, appBlockHeight, storeBlockHeight, false) - - } else if appBlockHeight == storeBlockHeight { - // We're good! - assertAppHashEqualsOneFromState(appHash, state) - return appHash, nil - } - - } else if storeBlockHeight == stateBlockHeight+1 { - // We saved the block in the store but haven't updated the state, - // so we'll need to replay a block using the WAL. - switch { - case appBlockHeight < stateBlockHeight: - // the app is further behind than it should be, so replay blocks - // but leave the last block to go through the WAL - return h.replayBlocks(state, proxyApp, appBlockHeight, storeBlockHeight, true) - - case appBlockHeight == stateBlockHeight: - // We haven't run Commit (both the state and app are one block behind), - // so replayBlock with the real app. - // NOTE: We could instead use the cs.WAL on cs.Start, - // but we'd have to allow the WAL to replay a block that wrote it's #ENDHEIGHT - h.logger.Info("Replay last block using real app") - state, err = h.replayBlock(state, storeBlockHeight, proxyApp.Consensus()) - return state.AppHash, err - - case appBlockHeight == storeBlockHeight: - // We ran Commit, but didn't save the state, so replayBlock with mock app. - abciResponses, err := h.stateStore.LoadABCIResponses(storeBlockHeight) - if err != nil { - return nil, err - } - mockApp := newMockProxyApp(appHash, abciResponses) - h.logger.Info("Replay last block using mock app") - state, err = h.replayBlock(state, storeBlockHeight, mockApp) - return state.AppHash, err - } - - } - - panic(fmt.Sprintf("uncovered case! appHeight: %d, storeHeight: %d, stateHeight: %d", - appBlockHeight, storeBlockHeight, stateBlockHeight)) -} - -func (h *Handshaker) replayBlocks( - state sm.State, - proxyApp proxy.AppConns, - appBlockHeight, - storeBlockHeight int64, - mutateState bool) ([]byte, error) { - // App is further behind than it should be, so we need to replay blocks. - // We replay all blocks from appBlockHeight+1. - // - // Note that we don't have an old version of the state, - // so we by-pass state validation/mutation using sm.ExecCommitBlock. - // This also means we won't be saving validator sets if they change during this period. - // TODO: Load the historical information to fix this and just use state.ApplyBlock - // - // If mutateState == true, the final block is replayed with h.replayBlock() - - var appHash []byte - var err error - finalBlock := storeBlockHeight - if mutateState { - finalBlock-- - } - firstBlock := appBlockHeight + 1 - if firstBlock == 1 { - firstBlock = state.InitialHeight - } - for i := firstBlock; i <= finalBlock; i++ { - h.logger.Info("Applying block", "height", i) - block := h.store.LoadBlock(i) - // Extra check to ensure the app was not changed in a way it shouldn't have. - if len(appHash) > 0 { - assertAppHashEqualsOneFromBlock(appHash, block) - } - - appHash, err = sm.ExecCommitBlock( - proxyApp.Consensus(), - block, - h.logger, - h.stateStore, - h.genDoc.InitialHeight, - state.VoterParams, - ) - if err != nil { - return nil, err - } - - h.nBlocks++ - } - - if mutateState { - // sync the final block - h.logger.Info("Replaying final block using real app", "height", storeBlockHeight) - state, err = h.replayBlock(state, storeBlockHeight, proxyApp.Consensus()) - if err != nil { - return nil, err - } - appHash = state.AppHash - } - - assertAppHashEqualsOneFromState(appHash, state) - return appHash, nil -} - -// ApplyBlock on the proxyApp with the last block. -func (h *Handshaker) replayBlock(state sm.State, height int64, proxyApp proxy.AppConnConsensus) (sm.State, error) { - block := h.store.LoadBlock(height) - meta := h.store.LoadBlockMeta(height) - var err error - consensusParams, err := h.stateStore.LoadConsensusParams(height) - if err != nil { - return sm.State{}, err - } - state.ConsensusParams = consensusParams - state.Version.Consensus.App = consensusParams.Version.AppVersion - - // Use stubs for both mempool and evidence pool since no transactions nor - // evidence are needed here - block already exists. - blockExec := sm.NewBlockExecutor(h.stateStore, h.logger, proxyApp, emptyMempool{}, sm.EmptyEvidencePool{}) - blockExec.SetEventBus(h.eventBus) - - state, _, err = blockExec.ApplyBlock(state, meta.BlockID, block, nil) - if err != nil { - return sm.State{}, err - } - - h.nBlocks++ - - return state, nil -} - -func assertAppHashEqualsOneFromBlock(appHash []byte, block *types.Block) { - if !bytes.Equal(appHash, block.AppHash) { - panic(fmt.Sprintf(`block.AppHash does not match AppHash after replay. Got %X, expected %X. - -Block: %v -`, - appHash, block.AppHash, block)) - } -} - -func assertAppHashEqualsOneFromState(appHash []byte, state sm.State) { - if !bytes.Equal(appHash, state.AppHash) { - panic(fmt.Sprintf(`state.AppHash does not match AppHash after replay. Got -%X, expected %X. - -State: %v - -Did you reset Ostracon without resetting your application's data?`, - appHash, state.AppHash, state)) - } -} diff --git a/test/maverick/consensus/replay_file.go b/test/maverick/consensus/replay_file.go deleted file mode 100644 index a9b4d0d70..000000000 --- a/test/maverick/consensus/replay_file.go +++ /dev/null @@ -1,339 +0,0 @@ -package consensus - -import ( - "bufio" - "context" - "errors" - "fmt" - "io" - "os" - "strconv" - "strings" - - dbm "github.com/tendermint/tm-db" - - cfg "github.com/line/ostracon/config" - tmcon "github.com/line/ostracon/consensus" - "github.com/line/ostracon/libs/log" - tmos "github.com/line/ostracon/libs/os" - "github.com/line/ostracon/proxy" - sm "github.com/line/ostracon/state" - "github.com/line/ostracon/store" - "github.com/line/ostracon/types" -) - -const ( - // event bus subscriber - subscriber = "replay-file" -) - -//-------------------------------------------------------- -// replay messages interactively or all at once - -// replay the wal file -func RunReplayFile(config cfg.BaseConfig, csConfig *cfg.ConsensusConfig, console bool) { - consensusState := newConsensusStateForReplay(config, csConfig) - - if err := consensusState.ReplayFile(csConfig.WalFile(), console); err != nil { - tmos.Exit(fmt.Sprintf("Error during consensus replay: %v", err)) - } -} - -// Replay msgs in file or start the console -func (cs *State) ReplayFile(file string, console bool) error { - - if cs.IsRunning() { - return errors.New("cs is already running, cannot replay") - } - if cs.wal != nil { - return errors.New("cs wal is open, cannot replay") - } - - cs.startForReplay() - - // ensure all new step events are regenerated as expected - - ctx := context.Background() - newStepSub, err := cs.eventBus.Subscribe(ctx, subscriber, types.EventQueryNewRoundStep) - if err != nil { - return fmt.Errorf("failed to subscribe %s to %v", subscriber, types.EventQueryNewRoundStep) - } - defer func() { - if err := cs.eventBus.Unsubscribe(ctx, subscriber, types.EventQueryNewRoundStep); err != nil { - cs.Logger.Error("Error unsubscribing to event bus", "err", err) - } - }() - - // just open the file for reading, no need to use wal - fp, err := os.OpenFile(file, os.O_RDONLY, 0600) - if err != nil { - return err - } - - pb := newPlayback(file, fp, cs, cs.state.Copy()) - defer pb.fp.Close() - - var nextN int // apply N msgs in a row - var msg *tmcon.TimedWALMessage - for { - if nextN == 0 && console { - nextN = pb.replayConsoleLoop() - } - - msg, err = pb.dec.Decode() - if err == io.EOF { - return nil - } else if err != nil { - return err - } - - if err := pb.cs.readReplayMessage(msg, newStepSub); err != nil { - return err - } - - if nextN > 0 { - nextN-- - } - pb.count++ - } -} - -//------------------------------------------------ -// playback manager - -type playback struct { - cs *State - - fp *os.File - dec *WALDecoder - count int // how many lines/msgs into the file are we - - // replays can be reset to beginning - fileName string // so we can close/reopen the file - genesisState sm.State // so the replay session knows where to restart from -} - -func newPlayback(fileName string, fp *os.File, cs *State, genState sm.State) *playback { - return &playback{ - cs: cs, - fp: fp, - fileName: fileName, - genesisState: genState, - dec: NewWALDecoder(fp), - } -} - -// go back count steps by resetting the state and running (pb.count - count) steps -func (pb *playback) replayReset(count int, newStepSub types.Subscription) error { - if err := pb.cs.Stop(); err != nil { - return err - } - pb.cs.Wait() - - newCS := NewState(pb.cs.config, pb.genesisState.Copy(), pb.cs.blockExec, - pb.cs.blockStore, pb.cs.txNotifier, pb.cs.evpool, map[int64]Misbehavior{}) - newCS.SetEventBus(pb.cs.eventBus) - newCS.startForReplay() - - if err := pb.fp.Close(); err != nil { - return err - } - fp, err := os.OpenFile(pb.fileName, os.O_RDONLY, 0600) - if err != nil { - return err - } - pb.fp = fp - pb.dec = NewWALDecoder(fp) - count = pb.count - count - fmt.Printf("Reseting from %d to %d\n", pb.count, count) - pb.count = 0 - pb.cs = newCS - var msg *tmcon.TimedWALMessage - for i := 0; i < count; i++ { - msg, err = pb.dec.Decode() - if err == io.EOF { - return nil - } else if err != nil { - return err - } - if err := pb.cs.readReplayMessage(msg, newStepSub); err != nil { - return err - } - pb.count++ - } - return nil -} - -func (cs *State) startForReplay() { - cs.Logger.Error("Replay commands are disabled until someone updates them and writes tests") - /* TODO:! - // since we replay tocks we just ignore ticks - go func() { - for { - select { - case <-cs.tickChan: - case <-cs.Quit: - return - } - } - }()*/ -} - -// console function for parsing input and running commands -func (pb *playback) replayConsoleLoop() int { - for { - fmt.Printf("> ") - bufReader := bufio.NewReader(os.Stdin) - line, more, err := bufReader.ReadLine() - if more { - tmos.Exit("input is too long") - } else if err != nil { - tmos.Exit(err.Error()) - } - - tokens := strings.Split(string(line), " ") - if len(tokens) == 0 { - continue - } - - switch tokens[0] { - case "next": - // "next" -> replay next message - // "next N" -> replay next N messages - - if len(tokens) == 1 { - return 0 - } - i, err := strconv.Atoi(tokens[1]) - if err != nil { - fmt.Println("next takes an integer argument") - } else { - return i - } - - case "back": - // "back" -> go back one message - // "back N" -> go back N messages - - // NOTE: "back" is not supported in the state machine design, - // so we restart and replay up to - - ctx := context.Background() - // ensure all new step events are regenerated as expected - - newStepSub, err := pb.cs.eventBus.Subscribe(ctx, subscriber, types.EventQueryNewRoundStep) - if err != nil { - tmos.Exit(fmt.Sprintf("failed to subscribe %s to %v", subscriber, types.EventQueryNewRoundStep)) - } - defer func() { - if err := pb.cs.eventBus.Unsubscribe(ctx, subscriber, types.EventQueryNewRoundStep); err != nil { - pb.cs.Logger.Error("Error unsubscribing from eventBus", "err", err) - } - }() - - if len(tokens) == 1 { - if err := pb.replayReset(1, newStepSub); err != nil { - pb.cs.Logger.Error("Replay reset error", "err", err) - } - } else { - i, err := strconv.Atoi(tokens[1]) - if err != nil { - fmt.Println("back takes an integer argument") - } else if i > pb.count { - fmt.Printf("argument to back must not be larger than the current count (%d)\n", pb.count) - } else if err := pb.replayReset(i, newStepSub); err != nil { - pb.cs.Logger.Error("Replay reset error", "err", err) - } - } - - case "rs": - // "rs" -> print entire round state - // "rs short" -> print height/round/step - // "rs " -> print another field of the round state - - rs := pb.cs.RoundState - if len(tokens) == 1 { - fmt.Println(rs) - } else { - switch tokens[1] { - case "short": - fmt.Printf("%v/%v/%v\n", rs.Height, rs.Round, rs.Step) - case "voters": - fmt.Println(rs.Voters) - case "proposal": - fmt.Println(rs.Proposal) - case "proposal_block": - fmt.Printf("%v %v\n", rs.ProposalBlockParts.StringShort(), rs.ProposalBlock.StringShort()) - case "locked_round": - fmt.Println(rs.LockedRound) - case "locked_block": - fmt.Printf("%v %v\n", rs.LockedBlockParts.StringShort(), rs.LockedBlock.StringShort()) - case "votes": - fmt.Println(rs.Votes.StringIndented(" ")) - - default: - fmt.Println("Unknown option", tokens[1]) - } - } - case "n": - fmt.Println(pb.count) - } - } -} - -//-------------------------------------------------------------------------------- - -// convenience for replay mode -func newConsensusStateForReplay(config cfg.BaseConfig, csConfig *cfg.ConsensusConfig) *State { - dbType := dbm.BackendType(config.DBBackend) - // Get BlockStore - blockStoreDB, err := dbm.NewDB("blockstore", dbType, config.DBDir()) - if err != nil { - tmos.Exit(err.Error()) - } - blockStore := store.NewBlockStore(blockStoreDB) - - // Get State - stateDB, err := dbm.NewDB("state", dbType, config.DBDir()) - if err != nil { - tmos.Exit(err.Error()) - } - stateStore := sm.NewStore(stateDB) - gdoc, err := sm.MakeGenesisDocFromFile(config.GenesisFile()) - if err != nil { - tmos.Exit(err.Error()) - } - state, err := sm.MakeGenesisState(gdoc) - if err != nil { - tmos.Exit(err.Error()) - } - - // Create proxyAppConn connection (consensus, mempool, query) - clientCreator := proxy.DefaultClientCreator(config.ProxyApp, config.ABCI, config.DBDir()) - proxyApp := proxy.NewAppConns(clientCreator) - err = proxyApp.Start() - if err != nil { - tmos.Exit(fmt.Sprintf("Error starting proxy app conns: %v", err)) - } - - eventBus := types.NewEventBus() - if err := eventBus.Start(); err != nil { - tmos.Exit(fmt.Sprintf("Failed to start event bus: %v", err)) - } - - handshaker := NewHandshaker(stateStore, state, blockStore, gdoc) - handshaker.SetEventBus(eventBus) - err = handshaker.Handshake(proxyApp) - if err != nil { - tmos.Exit(fmt.Sprintf("Error on handshake: %v", err)) - } - - mempool, evpool := emptyMempool{}, sm.EmptyEvidencePool{} - blockExec := sm.NewBlockExecutor(stateStore, log.TestingLogger(), proxyApp.Consensus(), mempool, evpool) - - consensusState := NewState(csConfig, state.Copy(), blockExec, - blockStore, mempool, evpool, map[int64]Misbehavior{}) - - consensusState.SetEventBus(eventBus) - return consensusState -} diff --git a/test/maverick/consensus/replay_stubs.go b/test/maverick/consensus/replay_stubs.go deleted file mode 100644 index 50f700fec..000000000 --- a/test/maverick/consensus/replay_stubs.go +++ /dev/null @@ -1,92 +0,0 @@ -package consensus - -import ( - abci "github.com/line/ostracon/abci/types" - "github.com/line/ostracon/libs/clist" - mempl "github.com/line/ostracon/mempool" - tmstate "github.com/line/ostracon/proto/ostracon/state" - "github.com/line/ostracon/proxy" - "github.com/line/ostracon/types" -) - -//----------------------------------------------------------------------------- - -type emptyMempool struct{} - -var _ mempl.Mempool = emptyMempool{} - -func (emptyMempool) Lock() {} -func (emptyMempool) Unlock() {} -func (emptyMempool) Size() int { return 0 } -func (emptyMempool) CheckTxSync(_ types.Tx, _ mempl.TxInfo) (*abci.Response, error) { - return nil, nil -} -func (emptyMempool) CheckTxAsync(_ types.Tx, _ mempl.TxInfo, _ func(error), _ func(*abci.Response)) { -} -func (emptyMempool) ReapMaxBytesMaxGas(_, _ int64) types.Txs { return types.Txs{} } -func (emptyMempool) ReapMaxBytesMaxGasMaxTxs(_, _, _ int64) types.Txs { return types.Txs{} } -func (emptyMempool) ReapMaxTxs(n int) types.Txs { return types.Txs{} } -func (emptyMempool) Update( - _ *types.Block, - _ []*abci.ResponseDeliverTx, - _ mempl.PreCheckFunc, - _ mempl.PostCheckFunc, -) error { - return nil -} -func (emptyMempool) Flush() {} -func (emptyMempool) FlushAppConn() error { return nil } -func (emptyMempool) TxsAvailable() <-chan struct{} { return make(chan struct{}) } -func (emptyMempool) EnableTxsAvailable() {} -func (emptyMempool) TxsBytes() int64 { return 0 } - -func (emptyMempool) TxsFront() *clist.CElement { return nil } -func (emptyMempool) TxsWaitChan() <-chan struct{} { return nil } - -func (emptyMempool) InitWAL() error { return nil } -func (emptyMempool) CloseWAL() {} - -//----------------------------------------------------------------------------- -// mockProxyApp uses ABCIResponses to give the right results. -// -// Useful because we don't want to call Commit() twice for the same block on -// the real app. - -func newMockProxyApp(appHash []byte, abciResponses *tmstate.ABCIResponses) proxy.AppConnConsensus { - clientCreator := proxy.NewLocalClientCreator(&mockProxyApp{ - appHash: appHash, - abciResponses: abciResponses, - }) - cli, _ := clientCreator.NewABCIClient() - err := cli.Start() - if err != nil { - panic(err) - } - return proxy.NewAppConnConsensus(cli) -} - -type mockProxyApp struct { - abci.BaseApplication - - appHash []byte - txCount int - abciResponses *tmstate.ABCIResponses -} - -func (mock *mockProxyApp) DeliverTx(req abci.RequestDeliverTx) abci.ResponseDeliverTx { - r := mock.abciResponses.DeliverTxs[mock.txCount] - mock.txCount++ - if r == nil { - return abci.ResponseDeliverTx{} - } - return *r -} - -func (mock *mockProxyApp) EndBlock(req abci.RequestEndBlock) abci.ResponseEndBlock { - mock.txCount = 0 - return *mock.abciResponses.EndBlock -} - -func (mock *mockProxyApp) Commit() abci.ResponseCommit { - return abci.ResponseCommit{Data: mock.appHash} -} diff --git a/test/maverick/consensus/state.go b/test/maverick/consensus/state.go deleted file mode 100644 index 611c02285..000000000 --- a/test/maverick/consensus/state.go +++ /dev/null @@ -1,1996 +0,0 @@ -package consensus - -import ( - "bytes" - "errors" - "fmt" - "io/ioutil" - "os" - "reflect" - "runtime/debug" - "sync" - "time" - - "github.com/gogo/protobuf/proto" - - cfg "github.com/line/ostracon/config" - tmcon "github.com/line/ostracon/consensus" - cstypes "github.com/line/ostracon/consensus/types" - "github.com/line/ostracon/crypto" - tmevents "github.com/line/ostracon/libs/events" - "github.com/line/ostracon/libs/fail" - tmjson "github.com/line/ostracon/libs/json" - "github.com/line/ostracon/libs/log" - tmmath "github.com/line/ostracon/libs/math" - tmos "github.com/line/ostracon/libs/os" - "github.com/line/ostracon/libs/service" - "github.com/line/ostracon/p2p" - tmproto "github.com/line/ostracon/proto/ostracon/types" - sm "github.com/line/ostracon/state" - "github.com/line/ostracon/types" - tmtime "github.com/line/ostracon/types/time" -) - -// State handles execution of the consensus algorithm. -// It processes votes and proposals, and upon reaching agreement, -// commits blocks to the chain and executes them against the application. -// The internal state machine receives input from peers, the internal validator, and from a timer. -type State struct { - service.BaseService - - // config details - config *cfg.ConsensusConfig - privValidator types.PrivValidator // for signing votes - - // store blocks and commits - blockStore sm.BlockStore - - // create and execute blocks - blockExec *sm.BlockExecutor - - // notify us if txs are available - txNotifier txNotifier - - // add evidence to the pool - // when it's detected - evpool evidencePool - - // internal state - mtx sync.RWMutex - cstypes.RoundState - state sm.State // State until height-1. - - // state changes may be triggered by: msgs from peers, - // msgs from ourself, or by timeouts - peerMsgQueue chan msgInfo - internalMsgQueue chan msgInfo - timeoutTicker TimeoutTicker - // privValidator pubkey, memoized for the duration of one block - // to avoid extra requests to HSM - privValidatorPubKey crypto.PubKey - - // information about about added votes and block parts are written on this channel - // so statistics can be computed by reactor - statsMsgQueue chan msgInfo - - // we use eventBus to trigger msg broadcasts in the reactor, - // and to notify external subscribers, eg. through a websocket - eventBus *types.EventBus - - // a Write-Ahead Log ensures we can recover from any kind of crash - // and helps us avoid signing conflicting votes - wal tmcon.WAL - replayMode bool // so we don't log signing errors during replay - doWALCatchup bool // determines if we even try to do the catchup - - // for tests where we want to limit the number of transitions the state makes - nSteps int - - // some functions can be overwritten for testing - decideProposal func(height int64, round int32) - - // closed when we finish shutting down - done chan struct{} - - // synchronous pubsub between consensus state and reactor. - // state only emits EventNewRoundStep and EventVote - evsw tmevents.EventSwitch - - // for reporting metrics - metrics *tmcon.Metrics - - // misbehaviors mapped for each height (can't have more than one misbehavior per height) - misbehaviors map[int64]Misbehavior - - // the switch is passed to the state so that maveick misbehaviors can directly control which - // information they send to which nodes - sw *p2p.Switch -} - -// StateOption sets an optional parameter on the State. -type StateOption func(*State) - -// NewState returns a new State. -func NewState( - config *cfg.ConsensusConfig, - state sm.State, - blockExec *sm.BlockExecutor, - blockStore sm.BlockStore, - txNotifier txNotifier, - evpool evidencePool, - misbehaviors map[int64]Misbehavior, - options ...StateOption, -) *State { - cs := &State{ - config: config, - blockExec: blockExec, - blockStore: blockStore, - txNotifier: txNotifier, - peerMsgQueue: make(chan msgInfo, msgQueueSize), - internalMsgQueue: make(chan msgInfo, msgQueueSize), - timeoutTicker: NewTimeoutTicker(), - statsMsgQueue: make(chan msgInfo, msgQueueSize), - done: make(chan struct{}), - doWALCatchup: true, - wal: nilWAL{}, - evpool: evpool, - evsw: tmevents.NewEventSwitch(), - metrics: tmcon.NopMetrics(), - misbehaviors: misbehaviors, - } - // set function defaults (may be overwritten before calling Start) - cs.decideProposal = cs.defaultDecideProposal - - // We have no votes, so reconstruct LastCommit from SeenCommit. - if state.LastBlockHeight > 0 { - cs.reconstructLastCommit(state) - } - - cs.updateToState(state) - - // Don't call scheduleRound0 yet. - // We do that upon Start(). - - cs.BaseService = *service.NewBaseService(nil, "State", cs) - for _, option := range options { - option(cs) - } - return cs -} - -// I know this is not great but the maverick consensus state needs access to the peers -func (cs *State) SetSwitch(sw *p2p.Switch) { - cs.sw = sw -} - -// state transitions on complete-proposal, 2/3-any, 2/3-one -func (cs *State) handleMsg(mi msgInfo) { - cs.mtx.Lock() - defer cs.mtx.Unlock() - - var ( - added bool - err error - ) - msg, peerID := mi.Msg, mi.PeerID - switch msg := msg.(type) { - case *tmcon.ProposalMessage: - // will not cause transition. - // once proposal is set, we can receive block parts - // err = cs.setProposal(msg.Proposal) - if b, ok := cs.misbehaviors[cs.Height]; ok { - err = b.ReceiveProposal(cs, msg.Proposal) - } else { - err = defaultReceiveProposal(cs, msg.Proposal) - } - case *tmcon.BlockPartMessage: - // if the proposal is complete, we'll enterPrevote or tryFinalizeCommit - added, err = cs.addProposalBlockPart(msg, peerID) - if added { - cs.statsMsgQueue <- mi - } - - if err != nil && msg.Round != cs.Round { - cs.Logger.Debug( - "Received block part from wrong round", - "height", - cs.Height, - "csRound", - cs.Round, - "blockRound", - msg.Round) - err = nil - } - case *tmcon.VoteMessage: - // attempt to add the vote and dupeout the validator if its a duplicate signature - // if the vote gives us a 2/3-any or 2/3-one, we transition - added, err = cs.tryAddVote(msg.Vote, peerID) - if added { - cs.statsMsgQueue <- mi - } - - // if err == ErrAddingVote { - // TODO: punish peer - // We probably don't want to stop the peer here. The vote does not - // necessarily comes from a malicious peer but can be just broadcasted by - // a typical peer. - // https://github.com/tendermint/tendermint/issues/1281 - // } - - // NOTE: the vote is broadcast to peers by the reactor listening - // for vote events - - // TODO: If rs.Height == vote.Height && rs.Round < vote.Round, - // the peer is sending us CatchupCommit precommits. - // We could make note of this and help filter in broadcastHasVoteMessage(). - default: - cs.Logger.Error("Unknown msg type", "type", reflect.TypeOf(msg)) - return - } - - if err != nil { - cs.Logger.Error("Error with msg", "height", cs.Height, "round", cs.Round, - "peer", peerID, "err", err, "msg", msg) - } -} - -// Enter (CreateEmptyBlocks): from enterNewRound(height,round) -// Enter (CreateEmptyBlocks, CreateEmptyBlocksInterval > 0 ): -// after enterNewRound(height,round), after timeout of CreateEmptyBlocksInterval -// Enter (!CreateEmptyBlocks) : after enterNewRound(height,round), once txs are in the mempool -func (cs *State) enterPropose(height int64, round int32) { - logger := cs.Logger.With("height", height, "round", round) - - if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPropose <= cs.Step) { - logger.Debug(fmt.Sprintf( - "enterPropose(%v/%v): Invalid args. Current step: %v/%v/%v", - height, - round, - cs.Height, - cs.Round, - cs.Step)) - return - } - logger.Info(fmt.Sprintf("enterPropose(%v/%v). Current: %v/%v/%v", height, round, cs.Height, cs.Round, cs.Step)) - - defer func() { - // Done enterPropose: - cs.updateRoundStep(round, cstypes.RoundStepPropose) - cs.newStep() - - // If we have the whole proposal + POL, then goto Prevote now. - // else, we'll enterPrevote when the rest of the proposal is received (in AddProposalBlockPart), - // or else after timeoutPropose - if cs.isProposalComplete() { - cs.enterPrevote(height, cs.Round) - } - }() - - if b, ok := cs.misbehaviors[cs.Height]; ok { - b.EnterPropose(cs, height, round) - } else { - defaultEnterPropose(cs, height, round) - } -} - -// Enter: `timeoutPropose` after entering Propose. -// Enter: proposal block and POL is ready. -// Prevote for LockedBlock if we're locked, or ProposalBlock if valid. -// Otherwise vote nil. -func (cs *State) enterPrevote(height int64, round int32) { - if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPrevote <= cs.Step) { - cs.Logger.Debug(fmt.Sprintf( - "enterPrevote(%v/%v): Invalid args. Current step: %v/%v/%v", - height, - round, - cs.Height, - cs.Round, - cs.Step)) - return - } - - defer func() { - // Done enterPrevote: - cs.updateRoundStep(round, cstypes.RoundStepPrevote) - cs.newStep() - }() - - cs.Logger.Debug(fmt.Sprintf("enterPrevote(%v/%v); current: %v/%v/%v", height, round, cs.Height, cs.Round, cs.Step)) - - // Sign and broadcast vote as necessary - if b, ok := cs.misbehaviors[cs.Height]; ok { - b.EnterPrevote(cs, height, round) - } else { - defaultEnterPrevote(cs, height, round) - } - - // Once `addVote` hits any +2/3 prevotes, we will go to PrevoteWait - // (so we have more time to try and collect +2/3 prevotes for a single block) -} - -// Enter: `timeoutPrevote` after any +2/3 prevotes. -// Enter: `timeoutPrecommit` after any +2/3 precommits. -// Enter: +2/3 precomits for block or nil. -// Lock & precommit the ProposalBlock if we have enough prevotes for it (a POL in this round) -// else, unlock an existing lock and precommit nil if +2/3 of prevotes were nil, -// else, precommit nil otherwise. -func (cs *State) enterPrecommit(height int64, round int32) { - logger := cs.Logger.With("height", height, "round", round) - - if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPrecommit <= cs.Step) { - logger.Debug(fmt.Sprintf( - "enterPrecommit(%v/%v): Invalid args. Current step: %v/%v/%v", - height, - round, - cs.Height, - cs.Round, - cs.Step)) - return - } - - logger.Info(fmt.Sprintf("enterPrecommit(%v/%v). Current: %v/%v/%v", height, round, cs.Height, cs.Round, cs.Step)) - - defer func() { - // Done enterPrecommit: - cs.updateRoundStep(round, cstypes.RoundStepPrecommit) - cs.newStep() - }() - - if b, ok := cs.misbehaviors[cs.Height]; ok { - b.EnterPrecommit(cs, height, round) - } else { - defaultEnterPrecommit(cs, height, round) - } - -} - -func (cs *State) addVote( - vote *types.Vote, - peerID p2p.ID) (added bool, err error) { - cs.Logger.Debug( - "addVote", - "voteHeight", - vote.Height, - "voteType", - vote.Type, - "valIndex", - vote.ValidatorIndex, - "csHeight", - cs.Height, - ) - - // A precommit for the previous height? - // These come in while we wait timeoutCommit - if vote.Height+1 == cs.Height && vote.Type == tmproto.PrecommitType { - if cs.Step != cstypes.RoundStepNewHeight { - // Late precommit at prior height is ignored - cs.Logger.Debug("Precommit vote came in after commit timeout and has been ignored", "vote", vote) - return - } - added, err = cs.LastCommit.AddVote(vote) - if !added { - return - } - - cs.Logger.Info(fmt.Sprintf("Added to lastPrecommits: %v", cs.LastCommit.StringShort())) - _ = cs.eventBus.PublishEventVote(types.EventDataVote{Vote: vote}) - cs.evsw.FireEvent(types.EventVote, vote) - - // if we can skip timeoutCommit and have all the votes now, - if cs.config.SkipTimeoutCommit && cs.LastCommit.HasAll() { - // go straight to new round (skip timeout commit) - // cs.scheduleTimeout(time.Duration(0), cs.Height, 0, cstypes.RoundStepNewHeight) - cs.enterNewRound(cs.Height, 0) - } - - return - } - - // Height mismatch is ignored. - // Not necessarily a bad peer, but not favourable behaviour. - if vote.Height != cs.Height { - cs.Logger.Debug("vote ignored and not added", "voteHeight", vote.Height, "csHeight", cs.Height, "peerID", peerID) - return - } - - added, err = cs.Votes.AddVote(vote, peerID) - if !added { - // Either duplicate, or error upon cs.Votes.AddByIndex() - return - } - - _ = cs.eventBus.PublishEventVote(types.EventDataVote{Vote: vote}) - cs.evsw.FireEvent(types.EventVote, vote) - - switch vote.Type { - case tmproto.PrevoteType: - if b, ok := cs.misbehaviors[cs.Height]; ok { - b.ReceivePrevote(cs, vote) - } else { - defaultReceivePrevote(cs, vote) - } - - case tmproto.PrecommitType: - if b, ok := cs.misbehaviors[cs.Height]; ok { - b.ReceivePrecommit(cs, vote) - } - defaultReceivePrecommit(cs, vote) - - default: - panic(fmt.Sprintf("Unexpected vote type %v", vote.Type)) - } - - return added, err -} - -//----------------------------------------------------------------------------- -// Errors - -var ( - ErrInvalidProposalSignature = errors.New("error invalid proposal signature") - ErrInvalidProposalPOLRound = errors.New("error invalid proposal POL round") - ErrAddingVote = errors.New("error adding vote") - ErrSignatureFoundInPastBlocks = errors.New("found signature from the same key") - - errPubKeyIsNotSet = errors.New("pubkey is not set. Look for \"Can't get private validator pubkey\" errors") -) - -//----------------------------------------------------------------------------- - -var ( - msgQueueSize = 1000 -) - -// msgs from the reactor which may update the state -type msgInfo struct { - Msg tmcon.Message `json:"msg"` - PeerID p2p.ID `json:"peer_key"` -} - -// internally generated messages which may update the state -type timeoutInfo struct { - Duration time.Duration `json:"duration"` - Height int64 `json:"height"` - Round int32 `json:"round"` - Step cstypes.RoundStepType `json:"step"` -} - -func (ti *timeoutInfo) String() string { - return fmt.Sprintf("%v ; %d/%d %v", ti.Duration, ti.Height, ti.Round, ti.Step) -} - -// interface to the mempool -type txNotifier interface { - TxsAvailable() <-chan struct{} -} - -// interface to the evidence pool -type evidencePool interface { - // reports conflicting votes to the evidence pool to be processed into evidence - ReportConflictingVotes(voteA, voteB *types.Vote) -} - -//---------------------------------------- -// Public interface - -// SetLogger implements Service. -func (cs *State) SetLogger(l log.Logger) { - cs.BaseService.Logger = l - cs.timeoutTicker.SetLogger(l) -} - -// SetEventBus sets event bus. -func (cs *State) SetEventBus(b *types.EventBus) { - cs.eventBus = b - cs.blockExec.SetEventBus(b) -} - -// StateMetrics sets the metrics. -func StateMetrics(metrics *tmcon.Metrics) StateOption { - return func(cs *State) { cs.metrics = metrics } -} - -// String returns a string. -func (cs *State) String() string { - // better not to access shared variables - return "ConsensusState" -} - -// GetState returns a copy of the chain state. -func (cs *State) GetState() sm.State { - cs.mtx.RLock() - defer cs.mtx.RUnlock() - return cs.state.Copy() -} - -// GetLastHeight returns the last height committed. -// If there were no blocks, returns 0. -func (cs *State) GetLastHeight() int64 { - cs.mtx.RLock() - defer cs.mtx.RUnlock() - return cs.RoundState.Height - 1 -} - -// GetRoundState returns a shallow copy of the internal consensus state. -func (cs *State) GetRoundState() *cstypes.RoundState { - cs.mtx.RLock() - rs := cs.RoundState // copy - cs.mtx.RUnlock() - return &rs -} - -// GetRoundStateJSON returns a json of RoundState. -func (cs *State) GetRoundStateJSON() ([]byte, error) { - cs.mtx.RLock() - defer cs.mtx.RUnlock() - return tmjson.Marshal(cs.RoundState) -} - -// GetRoundStateSimpleJSON returns a json of RoundStateSimple -func (cs *State) GetRoundStateSimpleJSON() ([]byte, error) { - cs.mtx.RLock() - defer cs.mtx.RUnlock() - return tmjson.Marshal(cs.RoundState.RoundStateSimple()) -} - -// GetValidators returns a copy of the current validators. -// ValidatorOrVoter: validator -func (cs *State) GetValidators() (int64, []*types.Validator) { - cs.mtx.RLock() - defer cs.mtx.RUnlock() - return cs.state.LastBlockHeight, cs.state.Validators.Copy().Validators -} - -// SetPrivValidator sets the private validator account for signing votes. It -// immediately requests pubkey and caches it. -func (cs *State) SetPrivValidator(priv types.PrivValidator) { - cs.mtx.Lock() - defer cs.mtx.Unlock() - - cs.privValidator = priv - - if err := cs.updatePrivValidatorPubKey(); err != nil { - cs.Logger.Error("Can't get private validator pubkey", "err", err) - } -} - -// SetTimeoutTicker sets the local timer. It may be useful to overwrite for testing. -func (cs *State) SetTimeoutTicker(timeoutTicker TimeoutTicker) { - cs.mtx.Lock() - cs.timeoutTicker = timeoutTicker - cs.mtx.Unlock() -} - -// LoadCommit loads the commit for a given height. -func (cs *State) LoadCommit(height int64) *types.Commit { - cs.mtx.RLock() - defer cs.mtx.RUnlock() - if height == cs.blockStore.Height() { - return cs.blockStore.LoadSeenCommit(height) - } - return cs.blockStore.LoadBlockCommit(height) -} - -// OnStart loads the latest state via the WAL, and starts the timeout and -// receive routines. -func (cs *State) OnStart() error { - // We may set the WAL in testing before calling Start, so only OpenWAL if its - // still the nilWAL. - if _, ok := cs.wal.(nilWAL); ok { - if err := cs.loadWalFile(); err != nil { - return err - } - } - - // We may have lost some votes if the process crashed reload from consensus - // log to catchup. - if cs.doWALCatchup { - repairAttempted := false - LOOP: - for { - err := cs.catchupReplay(cs.Height) - switch { - case err == nil: - break LOOP - case !IsDataCorruptionError(err): - cs.Logger.Error("Error on catchup replay. Proceeding to start State anyway", "err", err) - break LOOP - case repairAttempted: - return err - } - - cs.Logger.Info("WAL file is corrupted. Attempting repair", "err", err) - - // 1) prep work - if err := cs.wal.Stop(); err != nil { - return err - } - repairAttempted = true - - // 2) backup original WAL file - corruptedFile := fmt.Sprintf("%s.CORRUPTED", cs.config.WalFile()) - if err := tmos.CopyFile(cs.config.WalFile(), corruptedFile); err != nil { - return err - } - cs.Logger.Info("Backed up WAL file", "src", cs.config.WalFile(), "dst", corruptedFile) - - // 3) try to repair (WAL file will be overwritten!) - if err := repairWalFile(corruptedFile, cs.config.WalFile()); err != nil { - cs.Logger.Error("Repair failed", "err", err) - return err - } - cs.Logger.Info("Successful repair") - - // reload WAL file - if err := cs.loadWalFile(); err != nil { - return err - } - } - } - - if err := cs.evsw.Start(); err != nil { - return err - } - - // we need the timeoutRoutine for replay so - // we don't block on the tick chan. - // NOTE: we will get a build up of garbage go routines - // firing on the tockChan until the receiveRoutine is started - // to deal with them (by that point, at most one will be valid) - if err := cs.timeoutTicker.Start(); err != nil { - return err - } - - // Double Signing Risk Reduction - if err := cs.checkDoubleSigningRisk(cs.Height); err != nil { - return err - } - - // now start the receiveRoutine - go cs.receiveRoutine(0) - - // schedule the first round! - // use GetRoundState so we don't race the receiveRoutine for access - cs.scheduleRound0(cs.GetRoundState()) - - return nil -} - -// loadWalFile loads WAL data from file. It overwrites cs.wal. -func (cs *State) loadWalFile() error { - wal, err := cs.OpenWAL(cs.config.WalFile()) - if err != nil { - cs.Logger.Error("Error loading State wal", "err", err) - return err - } - cs.wal = wal - return nil -} - -// OnStop implements service.Service. -func (cs *State) OnStop() { - if err := cs.evsw.Stop(); err != nil { - cs.Logger.Error("error trying to stop eventSwitch", "error", err) - } - if err := cs.timeoutTicker.Stop(); err != nil { - cs.Logger.Error("error trying to stop timeoutTicket", "error", err) - } - // WAL is stopped in receiveRoutine. -} - -// Wait waits for the the main routine to return. -// NOTE: be sure to Stop() the event switch and drain -// any event channels or this may deadlock -func (cs *State) Wait() { - <-cs.done -} - -// OpenWAL opens a file to log all consensus messages and timeouts for -// deterministic accountability. -func (cs *State) OpenWAL(walFile string) (tmcon.WAL, error) { - wal, err := NewWAL(walFile) - if err != nil { - cs.Logger.Error("Failed to open WAL", "file", walFile, "err", err) - return nil, err - } - wal.SetLogger(cs.Logger.With("wal", walFile)) - if err := wal.Start(); err != nil { - cs.Logger.Error("Failed to start WAL", "err", err) - return nil, err - } - return wal, nil -} - -//------------------------------------------------------------ -// Public interface for passing messages into the consensus state, possibly causing a state transition. -// If peerID == "", the msg is considered internal. -// Messages are added to the appropriate queue (peer or internal). -// If the queue is full, the function may block. -// TODO: should these return anything or let callers just use events? - -// AddVote inputs a vote. -func (cs *State) AddVote(vote *types.Vote, peerID p2p.ID) (added bool, err error) { - if peerID == "" { - cs.internalMsgQueue <- msgInfo{&tmcon.VoteMessage{Vote: vote}, ""} - } else { - cs.peerMsgQueue <- msgInfo{&tmcon.VoteMessage{Vote: vote}, peerID} - } - - // TODO: wait for event?! - return false, nil -} - -// SetProposal inputs a proposal. -func (cs *State) SetProposal(proposal *types.Proposal, peerID p2p.ID) error { - - if peerID == "" { - cs.internalMsgQueue <- msgInfo{&tmcon.ProposalMessage{Proposal: proposal}, ""} - } else { - cs.peerMsgQueue <- msgInfo{&tmcon.ProposalMessage{Proposal: proposal}, peerID} - } - - // TODO: wait for event?! - return nil -} - -// AddProposalBlockPart inputs a part of the proposal block. -func (cs *State) AddProposalBlockPart(height int64, round int32, part *types.Part, peerID p2p.ID) error { - - if peerID == "" { - cs.internalMsgQueue <- msgInfo{&tmcon.BlockPartMessage{Height: height, Round: round, Part: part}, ""} - } else { - cs.peerMsgQueue <- msgInfo{&tmcon.BlockPartMessage{Height: height, Round: round, Part: part}, peerID} - } - - // TODO: wait for event?! - return nil -} - -// SetProposalAndBlock inputs the proposal and all block parts. -func (cs *State) SetProposalAndBlock( - proposal *types.Proposal, - block *types.Block, - parts *types.PartSet, - peerID p2p.ID, -) error { - if err := cs.SetProposal(proposal, peerID); err != nil { - return err - } - for i := 0; i < int(parts.Total()); i++ { - part := parts.GetPart(i) - if err := cs.AddProposalBlockPart(proposal.Height, proposal.Round, part, peerID); err != nil { - return err - } - } - return nil -} - -//------------------------------------------------------------ -// internal functions for managing the state - -func (cs *State) updateHeight(height int64) { - cs.metrics.Height.Set(float64(height)) - cs.Height = height -} - -func (cs *State) updateRoundStep(round int32, step cstypes.RoundStepType) { - cs.Round = round - cs.Step = step -} - -// enterNewRound(height, 0) at cs.StartTime. -func (cs *State) scheduleRound0(rs *cstypes.RoundState) { - // cs.Logger.Info("scheduleRound0", "now", tmtime.Now(), "startTime", cs.StartTime) - sleepDuration := rs.StartTime.Sub(tmtime.Now()) - cs.scheduleTimeout(sleepDuration, rs.Height, 0, cstypes.RoundStepNewHeight) -} - -// Attempt to schedule a timeout (by sending timeoutInfo on the tickChan) -func (cs *State) scheduleTimeout(duration time.Duration, height int64, round int32, step cstypes.RoundStepType) { - cs.timeoutTicker.ScheduleTimeout(timeoutInfo{duration, height, round, step}) -} - -// send a msg into the receiveRoutine regarding our own proposal, block part, or vote -func (cs *State) sendInternalMessage(mi msgInfo) { - select { - case cs.internalMsgQueue <- mi: - default: - // NOTE: using the go-routine means our votes can - // be processed out of order. - // TODO: use CList here for strict determinism and - // attempt push to internalMsgQueue in receiveRoutine - cs.Logger.Info("Internal msg queue is full. Using a go-routine") - go func() { cs.internalMsgQueue <- mi }() - } -} - -// Reconstruct LastCommit from SeenCommit, which we saved along with the block, -// (which happens even before saving the state) -func (cs *State) reconstructLastCommit(state sm.State) { - seenCommit := cs.blockStore.LoadSeenCommit(state.LastBlockHeight) - if seenCommit == nil { - panic(fmt.Sprintf("Failed to reconstruct LastCommit: seen commit for height %v not found", - state.LastBlockHeight)) - } - - lastPrecommits := types.CommitToVoteSet(state.ChainID, seenCommit, state.LastVoters) - if !lastPrecommits.HasTwoThirdsMajority() { - panic("Failed to reconstruct LastCommit: Does not have +2/3 maj") - } - - cs.LastCommit = lastPrecommits -} - -// Updates State and increments height to match that of state. -// The round becomes 0 and cs.Step becomes cstypes.RoundStepNewHeight. -func (cs *State) updateToState(state sm.State) { - if cs.CommitRound > -1 && 0 < cs.Height && cs.Height != state.LastBlockHeight { - panic(fmt.Sprintf("updateToState() expected state height of %v but found %v", - cs.Height, state.LastBlockHeight)) - } - if !cs.state.IsEmpty() { - if cs.state.LastBlockHeight > 0 && cs.state.LastBlockHeight+1 != cs.Height { - // This might happen when someone else is mutating cs.state. - // Someone forgot to pass in state.Copy() somewhere?! - panic(fmt.Sprintf("Inconsistent cs.state.LastBlockHeight+1 %v vs cs.Height %v", - cs.state.LastBlockHeight+1, cs.Height)) - } - if cs.state.LastBlockHeight > 0 && cs.Height == cs.state.InitialHeight { - panic(fmt.Sprintf("Inconsistent cs.state.LastBlockHeight %v, expected 0 for initial height %v", - cs.state.LastBlockHeight, cs.state.InitialHeight)) - } - - // If state isn't further out than cs.state, just ignore. - // This happens when SwitchToConsensus() is called in the reactor. - // We don't want to reset e.g. the Votes, but we still want to - // signal the new round step, because other services (eg. txNotifier) - // depend on having an up-to-date peer state! - if state.LastBlockHeight <= cs.state.LastBlockHeight { - cs.Logger.Info( - "Ignoring updateToState()", - "newHeight", - state.LastBlockHeight+1, - "oldHeight", - cs.state.LastBlockHeight+1) - cs.newStep() - return - } - } - - // Reset fields based on state. - voters := state.Voters - - switch { - case state.LastBlockHeight == 0: // Very first commit should be empty. - cs.LastCommit = (*types.VoteSet)(nil) - case cs.CommitRound > -1 && cs.Votes != nil: // Otherwise, use cs.Votes - if !cs.Votes.Precommits(cs.CommitRound).HasTwoThirdsMajority() { - panic(fmt.Sprintf("Wanted to form a Commit, but Precommits (H/R: %d/%d) didn't have 2/3+: %v", - state.LastBlockHeight, - cs.CommitRound, - cs.Votes.Precommits(cs.CommitRound))) - } - cs.LastCommit = cs.Votes.Precommits(cs.CommitRound) - case cs.LastCommit == nil: - // NOTE: when Ostracon starts, it has no votes. reconstructLastCommit - // must be called to reconstruct LastCommit from SeenCommit. - panic(fmt.Sprintf("LastCommit cannot be empty after initial block (H:%d)", - state.LastBlockHeight+1, - )) - } - - // Next desired block height - height := state.LastBlockHeight + 1 - if height == 1 { - height = state.InitialHeight - } - - // RoundState fields - cs.updateHeight(height) - cs.updateRoundStep(0, cstypes.RoundStepNewHeight) - if cs.CommitTime.IsZero() { - // "Now" makes it easier to sync up dev nodes. - // We add timeoutCommit to allow transactions - // to be gathered for the first block. - // And alternative solution that relies on clocks: - // cs.StartTime = state.LastBlockTime.Add(timeoutCommit) - cs.StartTime = cs.config.Commit(tmtime.Now()) - } else { - cs.StartTime = cs.config.Commit(cs.CommitTime) - } - - cs.Validators = state.Validators.Copy() - cs.Voters = state.Voters.Copy() - cs.Proposal = nil - cs.ProposalBlock = nil - cs.ProposalBlockParts = nil - cs.LockedRound = -1 - cs.LockedBlock = nil - cs.LockedBlockParts = nil - cs.ValidRound = -1 - cs.ValidBlock = nil - cs.ValidBlockParts = nil - cs.Votes = cstypes.NewHeightVoteSet(state.ChainID, height, voters) - cs.CommitRound = -1 - cs.LastVoters = state.LastVoters - cs.TriggeredTimeoutPrecommit = false - - cs.state = state - - // Finally, broadcast RoundState - cs.newStep() -} - -func (cs *State) newStep() { - rs := cs.RoundStateEvent() - if err := cs.wal.Write(rs); err != nil { - cs.Logger.Error("Error writing to wal", "err", err) - } - cs.nSteps++ - // newStep is called by updateToState in NewState before the eventBus is set! - if cs.eventBus != nil { - if err := cs.eventBus.PublishEventNewRoundStep(rs); err != nil { - cs.Logger.Error("Error publishing new round step", "err", err) - } - cs.evsw.FireEvent(types.EventNewRoundStep, &cs.RoundState) - } -} - -//----------------------------------------- -// the main go routines - -// receiveRoutine handles messages which may cause state transitions. -// it's argument (n) is the number of messages to process before exiting - use 0 to run forever -// It keeps the RoundState and is the only thing that updates it. -// Updates (state transitions) happen on timeouts, complete proposals, and 2/3 majorities. -// State must be locked before any internal state is updated. -func (cs *State) receiveRoutine(maxSteps int) { - onExit := func(cs *State) { - // NOTE: the internalMsgQueue may have signed messages from our - // priv_val that haven't hit the WAL, but its ok because - // priv_val tracks LastSig - - // close wal now that we're done writing to it - if err := cs.wal.Stop(); err != nil { - cs.Logger.Error("error trying to stop wal", "error", err) - } - cs.wal.Wait() - - close(cs.done) - } - - defer func() { - if r := recover(); r != nil { - cs.Logger.Error("CONSENSUS FAILURE!!!", "err", r, "stack", string(debug.Stack())) - // stop gracefully - // - // NOTE: We most probably shouldn't be running any further when there is - // some unexpected panic. Some unknown error happened, and so we don't - // know if that will result in the validator signing an invalid thing. It - // might be worthwhile to explore a mechanism for manual resuming via - // some console or secure RPC system, but for now, halting the chain upon - // unexpected consensus bugs sounds like the better option. - onExit(cs) - } - }() - - for { - if maxSteps > 0 { - if cs.nSteps >= maxSteps { - cs.Logger.Info("reached max steps. exiting receive routine") - cs.nSteps = 0 - return - } - } - rs := cs.RoundState - var mi msgInfo - - select { - case <-cs.txNotifier.TxsAvailable(): - cs.handleTxsAvailable() - case mi = <-cs.peerMsgQueue: - if err := cs.wal.Write(mi); err != nil { - cs.Logger.Error("Error writing to wal", "err", err) - } - // handles proposals, block parts, votes - // may generate internal events (votes, complete proposals, 2/3 majorities) - cs.handleMsg(mi) - case mi = <-cs.internalMsgQueue: - err := cs.wal.WriteSync(mi) // NOTE: fsync - if err != nil { - panic(fmt.Sprintf("Failed to write %v msg to consensus wal due to %v. Check your FS and restart the node", mi, err)) - } - - if _, ok := mi.Msg.(*tmcon.VoteMessage); ok { - // we actually want to simulate failing during - // the previous WriteSync, but this isn't easy to do. - // Equivalent would be to fail here and manually remove - // some bytes from the end of the wal. - fail.Fail() // XXX - } - - // handles proposals, block parts, votes - cs.handleMsg(mi) - case ti := <-cs.timeoutTicker.Chan(): // tockChan: - if err := cs.wal.Write(ti); err != nil { - cs.Logger.Error("Error writing to wal", "err", err) - } - // if the timeout is relevant to the rs - // go to the next step - cs.handleTimeout(ti, rs) - case <-cs.Quit(): - onExit(cs) - return - } - } -} - -func (cs *State) handleTimeout(ti timeoutInfo, rs cstypes.RoundState) { - cs.Logger.Debug("Received tock", "timeout", ti.Duration, "height", ti.Height, "round", ti.Round, "step", ti.Step) - - // timeouts must be for current height, round, step - if ti.Height != rs.Height || ti.Round < rs.Round || (ti.Round == rs.Round && ti.Step < rs.Step) { - cs.Logger.Debug("Ignoring tock because we're ahead", "height", rs.Height, "round", rs.Round, "step", rs.Step) - return - } - - // the timeout will now cause a state transition - cs.mtx.Lock() - defer cs.mtx.Unlock() - - switch ti.Step { - case cstypes.RoundStepNewHeight: - // NewRound event fired from enterNewRound. - // XXX: should we fire timeout here (for timeout commit)? - cs.enterNewRound(ti.Height, 0) - case cstypes.RoundStepNewRound: - cs.enterPropose(ti.Height, 0) - case cstypes.RoundStepPropose: - if err := cs.eventBus.PublishEventTimeoutPropose(cs.RoundStateEvent()); err != nil { - cs.Logger.Error("Error publishing timeout propose", "err", err) - } - cs.enterPrevote(ti.Height, ti.Round) - case cstypes.RoundStepPrevoteWait: - if err := cs.eventBus.PublishEventTimeoutWait(cs.RoundStateEvent()); err != nil { - cs.Logger.Error("Error publishing timeout wait", "err", err) - } - cs.enterPrecommit(ti.Height, ti.Round) - case cstypes.RoundStepPrecommitWait: - if err := cs.eventBus.PublishEventTimeoutWait(cs.RoundStateEvent()); err != nil { - cs.Logger.Error("Error publishing timeout wait", "err", err) - } - cs.enterPrecommit(ti.Height, ti.Round) - cs.enterNewRound(ti.Height, ti.Round+1) - default: - panic(fmt.Sprintf("Invalid timeout step: %v", ti.Step)) - } - -} - -func (cs *State) handleTxsAvailable() { - cs.mtx.Lock() - defer cs.mtx.Unlock() - - // We only need to do this for round 0. - if cs.Round != 0 { - return - } - - switch cs.Step { - case cstypes.RoundStepNewHeight: // timeoutCommit phase - if cs.needProofBlock(cs.Height) { - // enterPropose will be called by enterNewRound - return - } - - // +1ms to ensure RoundStepNewRound timeout always happens after RoundStepNewHeight - timeoutCommit := cs.StartTime.Sub(tmtime.Now()) + 1*time.Millisecond - cs.scheduleTimeout(timeoutCommit, cs.Height, 0, cstypes.RoundStepNewRound) - case cstypes.RoundStepNewRound: // after timeoutCommit - cs.enterPropose(cs.Height, 0) - } -} - -//----------------------------------------------------------------------------- -// State functions -// Used internally by handleTimeout and handleMsg to make state transitions - -// Enter: `timeoutNewHeight` by startTime (commitTime+timeoutCommit), -// or, if SkipTimeoutCommit==true, after receiving all precommits from (height,round-1) -// Enter: `timeoutPrecommits` after any +2/3 precommits from (height,round-1) -// Enter: +2/3 precommits for nil at (height,round-1) -// Enter: +2/3 prevotes any or +2/3 precommits for block or any from (height, round) -// NOTE: cs.StartTime was already set for height. -func (cs *State) enterNewRound(height int64, round int32) { - logger := cs.Logger.With("height", height, "round", round) - - if cs.Height != height || round < cs.Round || (cs.Round == round && cs.Step != cstypes.RoundStepNewHeight) { - logger.Debug(fmt.Sprintf( - "enterNewRound(%v/%v): Invalid args. Current step: %v/%v/%v", - height, - round, - cs.Height, - cs.Round, - cs.Step)) - return - } - - if now := tmtime.Now(); cs.StartTime.After(now) { - logger.Debug("need to set a buffer and log message here for sanity", "startTime", cs.StartTime, "now", now) - } - - logger.Info(fmt.Sprintf("enterNewRound(%v/%v). Current: %v/%v/%v", height, round, cs.Height, cs.Round, cs.Step)) - - // Select the current height and round Proposer - cs.Proposer = cs.Validators.SelectProposer(cs.state.LastProofHash, height, round) - - // Setup new round - // we don't fire newStep for this step, - // but we fire an event, so update the round step first - cs.updateRoundStep(round, cstypes.RoundStepNewRound) - if round == 0 { - // We've already reset these upon new height, - // and meanwhile we might have received a proposal - // for round 0. - } else { - logger.Info("Resetting Proposal info") - cs.Proposal = nil - cs.ProposalBlock = nil - cs.ProposalBlockParts = nil - } - cs.Votes.SetRound(tmmath.SafeAddInt32(round, 1)) // also track next round (round+1) to allow round-skipping - cs.TriggeredTimeoutPrecommit = false - - if err := cs.eventBus.PublishEventNewRound(cs.NewRoundEvent()); err != nil { - cs.Logger.Error("Error publishing new round", "err", err) - } - cs.metrics.Rounds.Set(float64(round)) - - // Wait for txs to be available in the mempool - // before we enterPropose in round 0. If the last block changed the app hash, - // we may need an empty "proof" block, and enterPropose immediately. - waitForTxs := cs.config.WaitForTxs() && round == 0 && !cs.needProofBlock(height) - if waitForTxs { - if cs.config.CreateEmptyBlocksInterval > 0 { - cs.scheduleTimeout(cs.config.CreateEmptyBlocksInterval, height, round, - cstypes.RoundStepNewRound) - } - } else { - cs.enterPropose(height, round) - } -} - -// needProofBlock returns true on the first height (so the genesis app hash is signed right away) -// and where the last block (height-1) caused the app hash to change -func (cs *State) needProofBlock(height int64) bool { - if height == cs.state.InitialHeight { - return true - } - - lastBlockMeta := cs.blockStore.LoadBlockMeta(height - 1) - if lastBlockMeta == nil { - panic(fmt.Sprintf("needProofBlock: last block meta for height %d not found", height-1)) - } - return !bytes.Equal(cs.state.AppHash, lastBlockMeta.Header.AppHash) -} - -func (cs *State) isProposer(address []byte) bool { - return bytes.Equal(cs.Proposer.Address, address) -} - -func (cs *State) defaultDecideProposal(height int64, round int32) { - var block *types.Block - var blockParts *types.PartSet - - // Decide on block - if cs.ValidBlock != nil { - // If there is valid block, choose that. - block, blockParts = cs.ValidBlock, cs.ValidBlockParts - } else { - // Create a new proposal block from state/txs from the mempool. - block, blockParts = cs.createProposalBlock(round) - if block == nil { - return - } - } - - // Flush the WAL. Otherwise, we may not recompute the same proposal to sign, - // and the privValidator will refuse to sign anything. - if err := cs.wal.FlushAndSync(); err != nil { - cs.Logger.Error("Error flushing to disk") - } - - // Make proposal - propBlockID := types.BlockID{Hash: block.Hash(), PartSetHeader: blockParts.Header()} - proposal := types.NewProposal(height, round, cs.ValidRound, propBlockID) - p := proposal.ToProto() - if err := cs.privValidator.SignProposal(cs.state.ChainID, p); err == nil { - proposal.Signature = p.Signature - - // send proposal and block parts on internal msg queue - cs.sendInternalMessage(msgInfo{&tmcon.ProposalMessage{Proposal: proposal}, ""}) - for i := 0; i < int(blockParts.Total()); i++ { - part := blockParts.GetPart(i) - cs.sendInternalMessage(msgInfo{&tmcon.BlockPartMessage{Height: cs.Height, Round: cs.Round, Part: part}, ""}) - } - cs.Logger.Info("Signed proposal", "height", height, "round", round, "proposal", proposal) - cs.Logger.Debug(fmt.Sprintf("Signed proposal block: %v", block)) - } else if !cs.replayMode { - cs.Logger.Error("enterPropose: Error signing proposal", "height", height, "round", round, "err", err) - } -} - -// Returns true if the proposal block is complete && -// (if POLRound was proposed, we have +2/3 prevotes from there). -func (cs *State) isProposalComplete() bool { - if cs.Proposal == nil || cs.ProposalBlock == nil { - return false - } - // we have the proposal. if there's a POLRound, - // make sure we have the prevotes from it too - if cs.Proposal.POLRound < 0 { - return true - } - // if this is false the proposer is lying or we haven't received the POL yet - return cs.Votes.Prevotes(cs.Proposal.POLRound).HasTwoThirdsMajority() - -} - -// Create the next block to propose and return it. Returns nil block upon error. -// -// We really only need to return the parts, but the block is returned for -// convenience so we can log the proposal block. -// -// NOTE: keep it side-effect free for clarity. -// CONTRACT: cs.privValidator is not nil. -func (cs *State) createProposalBlock(round int32) (block *types.Block, blockParts *types.PartSet) { - if cs.privValidator == nil { - panic("entered createProposalBlock with privValidator being nil") - } - - var commit *types.Commit - switch { - case cs.Height == cs.state.InitialHeight: - // We're creating a proposal for the first block. - // The commit is empty, but not nil. - commit = types.NewCommit(0, 0, types.BlockID{}, nil) - case cs.LastCommit.HasTwoThirdsMajority(): - // Make the commit from LastCommit - commit = cs.LastCommit.MakeCommit() - default: // This shouldn't happen. - cs.Logger.Error("enterPropose: Cannot propose anything: No commit for the previous block") - return - } - - if cs.privValidatorPubKey == nil { - // If this node is a validator & proposer in the current round, it will - // miss the opportunity to create a block. - cs.Logger.Error(fmt.Sprintf("enterPropose: %v", errPubKeyIsNotSet)) - return - } - proposerAddr := cs.privValidatorPubKey.Address() - - message := cs.state.MakeHashMessage(round) - proof, err := cs.privValidator.GenerateVRFProof(message) - if err != nil { - cs.Logger.Error(fmt.Sprintf("enterPropose: %v", err)) - return - } - return cs.blockExec.CreateProposalBlock(cs.Height, cs.state, commit, proposerAddr, round, proof, 0) -} - -// Enter: any +2/3 prevotes at next round. -func (cs *State) enterPrevoteWait(height int64, round int32) { - logger := cs.Logger.With("height", height, "round", round) - - if cs.Height != height || round < cs.Round || (cs.Round == round && cstypes.RoundStepPrevoteWait <= cs.Step) { - logger.Debug(fmt.Sprintf( - "enterPrevoteWait(%v/%v): Invalid args. Current step: %v/%v/%v", - height, - round, - cs.Height, - cs.Round, - cs.Step)) - return - } - if !cs.Votes.Prevotes(round).HasTwoThirdsAny() { - panic(fmt.Sprintf("enterPrevoteWait(%v/%v), but Prevotes does not have any +2/3 votes", height, round)) - } - - logger.Debug(fmt.Sprintf("enterPrevoteWait(%v/%v); current: %v/%v/%v", height, round, cs.Height, cs.Round, cs.Step)) - - defer func() { - // Done enterPrevoteWait: - cs.updateRoundStep(round, cstypes.RoundStepPrevoteWait) - cs.newStep() - }() - - // Wait for some more prevotes; enterPrecommit - cs.scheduleTimeout(cs.config.Prevote(round), height, round, cstypes.RoundStepPrevoteWait) -} - -// Enter: any +2/3 precommits for next round. -func (cs *State) enterPrecommitWait(height int64, round int32) { - logger := cs.Logger.With("height", height, "round", round) - - if cs.Height != height || round < cs.Round || (cs.Round == round && cs.TriggeredTimeoutPrecommit) { - logger.Debug( - fmt.Sprintf( - "enterPrecommitWait(%v/%v): Invalid args. "+ - "Current state is Height/Round: %v/%v/, TriggeredTimeoutPrecommit:%v", - height, round, cs.Height, cs.Round, cs.TriggeredTimeoutPrecommit)) - return - } - if !cs.Votes.Precommits(round).HasTwoThirdsAny() { - panic(fmt.Sprintf("enterPrecommitWait(%v/%v), but Precommits does not have any +2/3 votes", height, round)) - } - logger.Info(fmt.Sprintf("enterPrecommitWait(%v/%v). Current: %v/%v/%v", height, round, cs.Height, cs.Round, cs.Step)) - - defer func() { - // Done enterPrecommitWait: - cs.TriggeredTimeoutPrecommit = true - cs.newStep() - }() - - // Wait for some more precommits; enterNewRound - cs.scheduleTimeout(cs.config.Precommit(round), height, round, cstypes.RoundStepPrecommitWait) -} - -// Enter: +2/3 precommits for block -func (cs *State) enterCommit(height int64, commitRound int32) { - logger := cs.Logger.With("height", height, "commitRound", commitRound) - - if cs.Height != height || cstypes.RoundStepCommit <= cs.Step { - logger.Debug(fmt.Sprintf( - "enterCommit(%v/%v): Invalid args. Current step: %v/%v/%v", - height, - commitRound, - cs.Height, - cs.Round, - cs.Step)) - return - } - logger.Info(fmt.Sprintf("enterCommit(%v/%v). Current: %v/%v/%v", height, commitRound, cs.Height, cs.Round, cs.Step)) - - defer func() { - // Done enterCommit: - // keep cs.Round the same, commitRound points to the right Precommits set. - cs.updateRoundStep(cs.Round, cstypes.RoundStepCommit) - cs.CommitRound = commitRound - cs.CommitTime = tmtime.Now() - cs.newStep() - - // Maybe finalize immediately. - cs.tryFinalizeCommit(height) - }() - - blockID, ok := cs.Votes.Precommits(commitRound).TwoThirdsMajority() - if !ok { - panic("RunActionCommit() expects +2/3 precommits") - } - - // The Locked* fields no longer matter. - // Move them over to ProposalBlock if they match the commit hash, - // otherwise they'll be cleared in updateToState. - if cs.LockedBlock.HashesTo(blockID.Hash) { - logger.Info("Commit is for locked block. Set ProposalBlock=LockedBlock", "blockHash", blockID.Hash) - cs.ProposalBlock = cs.LockedBlock - cs.ProposalBlockParts = cs.LockedBlockParts - } - - // If we don't have the block being committed, set up to get it. - if !cs.ProposalBlock.HashesTo(blockID.Hash) { - if !cs.ProposalBlockParts.HasHeader(blockID.PartSetHeader) { - logger.Info( - "commit is for a block we do not know about; set ProposalBlock=nil", - "proposal", cs.ProposalBlock.Hash(), - "commit", blockID.Hash, - ) - - // We're getting the wrong block. - // Set up ProposalBlockParts and keep waiting. - cs.ProposalBlock = nil - cs.ProposalBlockParts = types.NewPartSetFromHeader(blockID.PartSetHeader) - if err := cs.eventBus.PublishEventValidBlock(cs.RoundStateEvent()); err != nil { - cs.Logger.Error("Error publishing valid block", "err", err) - } - cs.evsw.FireEvent(types.EventValidBlock, &cs.RoundState) - } - // else { - // We just need to keep waiting. - // } - } -} - -// If we have the block AND +2/3 commits for it, finalize. -func (cs *State) tryFinalizeCommit(height int64) { - logger := cs.Logger.With("height", height) - - if cs.Height != height { - panic(fmt.Sprintf("tryFinalizeCommit() cs.Height: %v vs height: %v", cs.Height, height)) - } - - blockID, ok := cs.Votes.Precommits(cs.CommitRound).TwoThirdsMajority() - if !ok || len(blockID.Hash) == 0 { - logger.Error("Attempt to finalize failed. There was no +2/3 majority, or +2/3 was for .") - return - } - if !cs.ProposalBlock.HashesTo(blockID.Hash) { - // TODO: this happens every time if we're not a validator (ugly logs) - // TODO: ^^ wait, why does it matter that we're a validator? - logger.Debug( - "attempt to finalize failed; we do not have the commit block", - "proposal-block", cs.ProposalBlock.Hash(), - "commit-block", blockID.Hash, - ) - return - } - - cs.finalizeCommit(height) -} - -// Increment height and goto cstypes.RoundStepNewHeight -func (cs *State) finalizeCommit(height int64) { - if cs.Height != height || cs.Step != cstypes.RoundStepCommit { - cs.Logger.Debug(fmt.Sprintf( - "finalizeCommit(%v): Invalid args. Current step: %v/%v/%v", - height, - cs.Height, - cs.Round, - cs.Step)) - return - } - - blockID, ok := cs.Votes.Precommits(cs.CommitRound).TwoThirdsMajority() - block, blockParts := cs.ProposalBlock, cs.ProposalBlockParts - - if !ok { - panic("Cannot finalizeCommit, commit does not have two thirds majority") - } - if !blockParts.HasHeader(blockID.PartSetHeader) { - panic("Expected ProposalBlockParts header to be commit header") - } - if !block.HashesTo(blockID.Hash) { - panic("Cannot finalizeCommit, ProposalBlock does not hash to commit hash") - } - if err := cs.blockExec.ValidateBlock(cs.state, cs.Round, block); err != nil { - panic(fmt.Errorf("+2/3 committed an invalid block: %w", err)) - } - - cs.Logger.Info("finalizing commit of block with N txs", - "height", block.Height, - "hash", block.Hash(), - "root", block.AppHash, - "N", len(block.Txs), - ) - cs.Logger.Debug(fmt.Sprintf("%v", block)) - - fail.Fail() // XXX - - // Save to blockStore. - if cs.blockStore.Height() < block.Height { - // NOTE: the seenCommit is local justification to commit this block, - // but may differ from the LastCommit included in the next block - precommits := cs.Votes.Precommits(cs.CommitRound) - seenCommit := precommits.MakeCommit() - cs.blockStore.SaveBlock(block, blockParts, seenCommit) - } else { - // Happens during replay if we already saved the block but didn't commit - cs.Logger.Debug("calling finalizeCommit on already stored block", "height", block.Height) - } - - fail.Fail() // XXX - - // Write EndHeightMessage{} for this height, implying that the blockstore - // has saved the block. - // - // If we crash before writing this EndHeightMessage{}, we will recover by - // running ApplyBlock during the ABCI handshake when we restart. If we - // didn't save the block to the blockstore before writing - // EndHeightMessage{}, we'd have to change WAL replay -- currently it - // complains about replaying for heights where an #ENDHEIGHT entry already - // exists. - // - // Either way, the State should not be resumed until we - // successfully call ApplyBlock (ie. later here, or in Handshake after - // restart). - endMsg := tmcon.EndHeightMessage{Height: height} - if err := cs.wal.WriteSync(endMsg); err != nil { // NOTE: fsync - panic(fmt.Sprintf("Failed to write %v msg to consensus wal due to %v. Check your FS and restart the node", - endMsg, err)) - } - - fail.Fail() // XXX - - // Create a copy of the state for staging and an event cache for txs. - stateCopy := cs.state.Copy() - - // Execute and commit the block, update and save the state, and update the mempool. - // NOTE The block.AppHash wont reflect these txs until the next block. - var err error - var retainHeight int64 - stateCopy, retainHeight, err = cs.blockExec.ApplyBlock( - stateCopy, - types.BlockID{Hash: block.Hash(), PartSetHeader: blockParts.Header()}, - block, nil) - if err != nil { - cs.Logger.Error("Error on ApplyBlock", "err", err) - return - } - - fail.Fail() // XXX - - // Prune old heights, if requested by ABCI app. - if retainHeight > 0 { - pruned, err := cs.pruneBlocks(retainHeight) - if err != nil { - cs.Logger.Error("Failed to prune blocks", "retainHeight", retainHeight, "err", err) - } else { - cs.Logger.Info("Pruned blocks", "pruned", pruned, "retainHeight", retainHeight) - } - } - - // must be called before we update state - cs.recordMetrics(height, block) - - // NewHeightStep! - cs.updateToState(stateCopy) - - fail.Fail() // XXX - - // Private validator might have changed it's key pair => refetch pubkey. - if err := cs.updatePrivValidatorPubKey(); err != nil { - cs.Logger.Error("Can't get private validator pubkey", "err", err) - } - - // cs.StartTime is already set. - // Schedule Round0 to start soon. - cs.scheduleRound0(&cs.RoundState) - - // By here, - // * cs.Height has been increment to height+1 - // * cs.Step is now cstypes.RoundStepNewHeight - // * cs.StartTime is set to when we will start round0. -} - -func (cs *State) pruneBlocks(retainHeight int64) (uint64, error) { - base := cs.blockStore.Base() - if retainHeight <= base { - return 0, nil - } - pruned, err := cs.blockStore.PruneBlocks(retainHeight) - if err != nil { - return 0, fmt.Errorf("failed to prune block store: %w", err) - } - err = cs.blockExec.Store().PruneStates(base, retainHeight) - if err != nil { - return 0, fmt.Errorf("failed to prune state database: %w", err) - } - return pruned, nil -} - -func (cs *State) recordMetrics(height int64, block *types.Block) { - cs.metrics.Validators.Set(float64(cs.Validators.Size())) - cs.metrics.ValidatorsPower.Set(float64(cs.Validators.TotalVotingPower())) - cs.metrics.Voters.Set(float64(cs.Voters.Size())) - cs.metrics.VotersPower.Set(float64(cs.Voters.TotalVotingWeight())) - - var ( - missingVoters int - missingVotersPower int64 - ) - // height=0 -> MissingVoters and MissingVotersPower are both 0. - // Remember that the first LastCommit is intentionally empty, so it's not - // fair to increment missing voters number. - if height > cs.state.InitialHeight { - // Sanity check that commit size matches voter set size - only applies - // after first block. - var ( - commitSize = block.LastCommit.Size() - valSetLen = len(cs.LastVoters.Voters) - address types.Address - ) - if commitSize != valSetLen { - panic(fmt.Sprintf("commit size (%d) doesn't match valset length (%d) at height %d\n\n%v\n\n%v", - commitSize, valSetLen, block.Height, block.LastCommit.Signatures, cs.LastVoters.Voters)) - } - - if cs.privValidator != nil { - if cs.privValidatorPubKey == nil { - // Metrics won't be updated, but it's not critical. - cs.Logger.Error(fmt.Sprintf("recordMetrics: %v", errPubKeyIsNotSet)) - } else { - address = cs.privValidatorPubKey.Address() - } - } - - selectedAsVoter := false - if cs.privValidator != nil { - pubkey, err := cs.privValidator.GetPubKey() - if err != nil { - // Metrics won't be updated, but it's not critical. - cs.Logger.Error("Error on retrieval of pubkey", "err", err) - } else { - address = pubkey.Address() - } - } - - for i, val := range cs.LastVoters.Voters { - commitSig := block.LastCommit.Signatures[i] - if commitSig.Absent() { - missingVoters++ - missingVotersPower += val.VotingWeight - } - - if bytes.Equal(val.Address, address) { - label := []string{ - "validator_address", val.Address.String(), - } - cs.metrics.VoterPower.With(label...).Set(float64(val.VotingWeight)) - selectedAsVoter = true - if commitSig.ForBlock() { - cs.metrics.VoterLastSignedHeight.With(label...).Set(float64(height)) - } else { - cs.metrics.VoterMissedBlocks.With(label...).Add(float64(1)) - } - } - - } - if !selectedAsVoter { - address := "" - if cs.privValidator != nil { - pubKey, err := cs.privValidator.GetPubKey() - if err == nil && cs.Validators != nil && cs.Validators.HasAddress(pubKey.Address().Bytes()) { - address = pubKey.Address().String() - } - } - label := []string{ - "validator_address", address, - } - cs.metrics.VoterPower.With(label...).Set(float64(0)) - } - } - cs.metrics.MissingVoters.Set(float64(missingVoters)) - cs.metrics.MissingVotersPower.Set(float64(missingVotersPower)) - - // NOTE: byzantine voters power and count is only for consensus evidence i.e. duplicate vote - var ( - byzantineVotersPower = int64(0) - byzantineVotersCount = int64(0) - ) - for _, ev := range block.Evidence.Evidence { - if dve, ok := ev.(*types.DuplicateVoteEvidence); ok { - if _, val := cs.Voters.GetByAddress(dve.VoteA.ValidatorAddress); val != nil { - byzantineVotersCount++ - byzantineVotersPower += val.VotingWeight - } - } - } - cs.metrics.ByzantineVoters.Set(float64(byzantineVotersCount)) - cs.metrics.ByzantineVotersPower.Set(float64(byzantineVotersPower)) - - if height > 1 { - lastBlockMeta := cs.blockStore.LoadBlockMeta(height - 1) - if lastBlockMeta != nil { - cs.metrics.BlockIntervalSeconds.Set( - block.Time.Sub(lastBlockMeta.Header.Time).Seconds(), - ) - } - } - - cs.metrics.NumTxs.Set(float64(len(block.Data.Txs))) - cs.metrics.TotalTxs.Add(float64(len(block.Data.Txs))) - cs.metrics.BlockSizeBytes.Set(float64(block.Size())) - cs.metrics.CommittedHeight.Set(float64(block.Height)) -} - -//----------------------------------------------------------------------------- - -// NOTE: block is not necessarily valid. -// Asynchronously triggers either enterPrevote (before we timeout of propose) or tryFinalizeCommit, -// once we have the full block. -func (cs *State) addProposalBlockPart(msg *tmcon.BlockPartMessage, peerID p2p.ID) (added bool, err error) { - height, round, part := msg.Height, msg.Round, msg.Part - - // Blocks might be reused, so round mismatch is OK - if cs.Height != height { - cs.Logger.Debug("Received block part from wrong height", "height", height, "round", round) - return false, nil - } - - // We're not expecting a block part. - if cs.ProposalBlockParts == nil { - // NOTE: this can happen when we've gone to a higher round and - // then receive parts from the previous round - not necessarily a bad peer. - cs.Logger.Info("Received a block part when we're not expecting any", - "height", height, "round", round, "index", part.Index, "peer", peerID) - return false, nil - } - - added, err = cs.ProposalBlockParts.AddPart(part) - if err != nil { - return added, err - } - if cs.ProposalBlockParts.ByteSize() > cs.state.ConsensusParams.Block.MaxBytes { - return added, fmt.Errorf("total size of proposal block parts exceeds maximum block bytes (%d > %d)", - cs.ProposalBlockParts.ByteSize(), cs.state.ConsensusParams.Block.MaxBytes, - ) - } - if added && cs.ProposalBlockParts.IsComplete() { - bz, err := ioutil.ReadAll(cs.ProposalBlockParts.GetReader()) - if err != nil { - return added, err - } - - var pbb = new(tmproto.Block) - err = proto.Unmarshal(bz, pbb) - if err != nil { - return added, err - } - - block, err := types.BlockFromProto(pbb) - if err != nil { - return added, err - } - - cs.ProposalBlock = block - // NOTE: it's possible to receive complete proposal blocks for future rounds without having the proposal - cs.Logger.Info("Received complete proposal block", "height", cs.ProposalBlock.Height, "hash", cs.ProposalBlock.Hash()) - if err := cs.eventBus.PublishEventCompleteProposal(cs.CompleteProposalEvent()); err != nil { - cs.Logger.Error("Error publishing event complete proposal", "err", err) - } - - // Update Valid* if we can. - prevotes := cs.Votes.Prevotes(cs.Round) - blockID, hasTwoThirds := prevotes.TwoThirdsMajority() - if hasTwoThirds && !blockID.IsZero() && (cs.ValidRound < cs.Round) { - if cs.ProposalBlock.HashesTo(blockID.Hash) { - cs.Logger.Info("Updating valid block to new proposal block", - "valid-round", cs.Round, "valid-block-hash", cs.ProposalBlock.Hash()) - cs.ValidRound = cs.Round - cs.ValidBlock = cs.ProposalBlock - cs.ValidBlockParts = cs.ProposalBlockParts - } - // TODO: In case there is +2/3 majority in Prevotes set for some - // block and cs.ProposalBlock contains different block, either - // proposer is faulty or voting power of faulty processes is more - // than 1/3. We should trigger in the future accountability - // procedure at this point. - } - - if cs.Step <= cstypes.RoundStepPropose && cs.isProposalComplete() { - // Move onto the next step - cs.enterPrevote(height, cs.Round) - if hasTwoThirds { // this is optimisation as this will be triggered when prevote is added - cs.enterPrecommit(height, cs.Round) - } - } else if cs.Step == cstypes.RoundStepCommit { - // If we're waiting on the proposal block... - cs.tryFinalizeCommit(height) - } - return added, nil - } - return added, nil -} - -// Attempt to add the vote. if its a duplicate signature, dupeout the validator -func (cs *State) tryAddVote(vote *types.Vote, peerID p2p.ID) (bool, error) { - added, err := cs.addVote(vote, peerID) - if err != nil { - // If the vote height is off, we'll just ignore it, - // But if it's a conflicting sig, add it to the cs.evpool. - // If it's otherwise invalid, punish peer. - // nolint: gocritic - if voteErr, ok := err.(*types.ErrVoteConflictingVotes); ok { - if cs.privValidatorPubKey == nil { - return false, errPubKeyIsNotSet - } - - if bytes.Equal(vote.ValidatorAddress, cs.privValidatorPubKey.Address()) { - cs.Logger.Error( - "Found conflicting vote from ourselves. Did you unsafe_reset a validator?", - "height", - vote.Height, - "round", - vote.Round, - "type", - vote.Type) - return added, err - } - cs.evpool.ReportConflictingVotes(voteErr.VoteA, voteErr.VoteB) - return added, err - } else if err == types.ErrVoteNonDeterministicSignature { - cs.Logger.Debug("Vote has non-deterministic signature", "err", err) - } else { - // Either - // 1) bad peer OR - // 2) not a bad peer? this can also err sometimes with "Unexpected step" OR - // 3) tmkms use with multiple validators connecting to a single tmkms instance - // (https://github.com/tendermint/tendermint/issues/3839). - cs.Logger.Info("Error attempting to add vote", "err", err) - return added, ErrAddingVote - } - } - return added, nil -} - -//----------------------------------------------------------------------------- - -// CONTRACT: cs.privValidator is not nil. -func (cs *State) signVote( - msgType tmproto.SignedMsgType, - hash []byte, - header types.PartSetHeader, -) (*types.Vote, error) { - // Flush the WAL. Otherwise, we may not recompute the same vote to sign, - // and the privValidator will refuse to sign anything. - if err := cs.wal.FlushAndSync(); err != nil { - return nil, err - } - - if cs.privValidatorPubKey == nil { - return nil, errPubKeyIsNotSet - } - addr := cs.privValidatorPubKey.Address() - valIdx, _ := cs.Voters.GetByAddress(addr) - - vote := &types.Vote{ - ValidatorAddress: addr, - ValidatorIndex: valIdx, - Height: cs.Height, - Round: cs.Round, - Timestamp: cs.voteTime(), - Type: msgType, - BlockID: types.BlockID{Hash: hash, PartSetHeader: header}, - } - v := vote.ToProto() - err := cs.privValidator.SignVote(cs.state.ChainID, v) - vote.Signature = v.Signature - - return vote, err -} - -func (cs *State) voteTime() time.Time { - now := tmtime.Now() - minVoteTime := now - // TODO: We should remove next line in case we don't vote for v in case cs.ProposalBlock == nil, - // even if cs.LockedBlock != nil. See https://docs.tendermint.com/master/spec/. - timeIota := time.Duration(cs.state.ConsensusParams.Block.TimeIotaMs) * time.Millisecond - if cs.LockedBlock != nil { - // See the BFT time spec https://docs.tendermint.com/master/spec/consensus/bft-time.html - minVoteTime = cs.LockedBlock.Time.Add(timeIota) - } else if cs.ProposalBlock != nil { - minVoteTime = cs.ProposalBlock.Time.Add(timeIota) - } - - if now.After(minVoteTime) { - return now - } - return minVoteTime -} - -// sign the vote and publish on internalMsgQueue -func (cs *State) signAddVote(msgType tmproto.SignedMsgType, hash []byte, header types.PartSetHeader) *types.Vote { - if cs.privValidator == nil { // the node does not have a key - return nil - } - - if cs.privValidatorPubKey == nil { - // Vote won't be signed, but it's not critical. - cs.Logger.Error(fmt.Sprintf("signAddVote: %v", errPubKeyIsNotSet)) - return nil - } - - // If the node not in the voter set, do nothing. - if !cs.Voters.HasAddress(cs.privValidatorPubKey.Address()) { - return nil - } - - // TODO: pass pubKey to signVote - vote, err := cs.signVote(msgType, hash, header) - if err == nil { - cs.sendInternalMessage(msgInfo{&tmcon.VoteMessage{Vote: vote}, ""}) - cs.Logger.Info("Signed and pushed vote", "height", cs.Height, "round", cs.Round, "vote", vote) - return vote - } - // if !cs.replayMode { - cs.Logger.Error("Error signing vote", "height", cs.Height, "round", cs.Round, "vote", vote, "err", err) - //} - return nil -} - -// updatePrivValidatorPubKey get's the private validator public key and -// memoizes it. This func returns an error if the private validator is not -// responding or responds with an error. -func (cs *State) updatePrivValidatorPubKey() error { - if cs.privValidator == nil { - return nil - } - - pubKey, err := cs.privValidator.GetPubKey() - if err != nil { - return err - } - cs.privValidatorPubKey = pubKey - return nil -} - -// look back to check existence of the node's consensus votes before joining consensus -func (cs *State) checkDoubleSigningRisk(height int64) error { - if cs.privValidator != nil && cs.privValidatorPubKey != nil && cs.config.DoubleSignCheckHeight > 0 && height > 0 { - valAddr := cs.privValidatorPubKey.Address() - doubleSignCheckHeight := cs.config.DoubleSignCheckHeight - if doubleSignCheckHeight > height { - doubleSignCheckHeight = height - } - for i := int64(1); i < doubleSignCheckHeight; i++ { - lastCommit := cs.blockStore.LoadSeenCommit(height - i) - if lastCommit != nil { - for sigIdx, s := range lastCommit.Signatures { - if s.BlockIDFlag == types.BlockIDFlagCommit && bytes.Equal(s.ValidatorAddress, valAddr) { - cs.Logger.Info("Found signature from the same key", "sig", s, "idx", sigIdx, "height", height-i) - return ErrSignatureFoundInPastBlocks - } - } - } - } - } - return nil -} - -//--------------------------------------------------------- - -func CompareHRS(h1 int64, r1 int32, s1 cstypes.RoundStepType, h2 int64, r2 int32, s2 cstypes.RoundStepType) int { - if h1 < h2 { - return -1 - } else if h1 > h2 { - return 1 - } - if r1 < r2 { - return -1 - } else if r1 > r2 { - return 1 - } - if s1 < s2 { - return -1 - } else if s1 > s2 { - return 1 - } - return 0 -} - -// repairWalFile decodes messages from src (until the decoder errors) and -// writes them to dst. -func repairWalFile(src, dst string) error { - in, err := os.Open(src) - if err != nil { - return err - } - defer in.Close() - - out, err := os.Open(dst) - if err != nil { - return err - } - defer out.Close() - - var ( - dec = NewWALDecoder(in) - enc = NewWALEncoder(out) - ) - - // best-case repair (until first error is encountered) - for { - msg, err := dec.Decode() - if err != nil { - break - } - - err = enc.Encode(msg) - if err != nil { - return fmt.Errorf("failed to encode msg: %w", err) - } - } - - return nil -} diff --git a/test/maverick/consensus/ticker.go b/test/maverick/consensus/ticker.go deleted file mode 100644 index 3761d1470..000000000 --- a/test/maverick/consensus/ticker.go +++ /dev/null @@ -1,134 +0,0 @@ -package consensus - -import ( - "time" - - "github.com/line/ostracon/libs/log" - "github.com/line/ostracon/libs/service" -) - -var ( - tickTockBufferSize = 10 -) - -// TimeoutTicker is a timer that schedules timeouts -// conditional on the height/round/step in the timeoutInfo. -// The timeoutInfo.Duration may be non-positive. -type TimeoutTicker interface { - Start() error - Stop() error - Chan() <-chan timeoutInfo // on which to receive a timeout - ScheduleTimeout(ti timeoutInfo) // reset the timer - - SetLogger(log.Logger) -} - -// timeoutTicker wraps time.Timer, -// scheduling timeouts only for greater height/round/step -// than what it's already seen. -// Timeouts are scheduled along the tickChan, -// and fired on the tockChan. -type timeoutTicker struct { - service.BaseService - - timer *time.Timer - tickChan chan timeoutInfo // for scheduling timeouts - tockChan chan timeoutInfo // for notifying about them -} - -// NewTimeoutTicker returns a new TimeoutTicker. -func NewTimeoutTicker() TimeoutTicker { - tt := &timeoutTicker{ - timer: time.NewTimer(0), - tickChan: make(chan timeoutInfo, tickTockBufferSize), - tockChan: make(chan timeoutInfo, tickTockBufferSize), - } - tt.BaseService = *service.NewBaseService(nil, "TimeoutTicker", tt) - tt.stopTimer() // don't want to fire until the first scheduled timeout - return tt -} - -// OnStart implements service.Service. It starts the timeout routine. -func (t *timeoutTicker) OnStart() error { - - go t.timeoutRoutine() - - return nil -} - -// OnStop implements service.Service. It stops the timeout routine. -func (t *timeoutTicker) OnStop() { - t.BaseService.OnStop() - t.stopTimer() -} - -// Chan returns a channel on which timeouts are sent. -func (t *timeoutTicker) Chan() <-chan timeoutInfo { - return t.tockChan -} - -// ScheduleTimeout schedules a new timeout by sending on the internal tickChan. -// The timeoutRoutine is always available to read from tickChan, so this won't block. -// The scheduling may fail if the timeoutRoutine has already scheduled a timeout for a later height/round/step. -func (t *timeoutTicker) ScheduleTimeout(ti timeoutInfo) { - t.tickChan <- ti -} - -//------------------------------------------------------------- - -// stop the timer and drain if necessary -func (t *timeoutTicker) stopTimer() { - // Stop() returns false if it was already fired or was stopped - if !t.timer.Stop() { - select { - case <-t.timer.C: - default: - t.Logger.Debug("Timer already stopped") - } - } -} - -// send on tickChan to start a new timer. -// timers are interupted and replaced by new ticks from later steps -// timeouts of 0 on the tickChan will be immediately relayed to the tockChan -func (t *timeoutTicker) timeoutRoutine() { - t.Logger.Debug("Starting timeout routine") - var ti timeoutInfo - for { - select { - case newti := <-t.tickChan: - t.Logger.Debug("Received tick", "old_ti", ti, "new_ti", newti) - - // ignore tickers for old height/round/step - if newti.Height < ti.Height { - continue - } else if newti.Height == ti.Height { - if newti.Round < ti.Round { - continue - } else if newti.Round == ti.Round { - if ti.Step > 0 && newti.Step <= ti.Step { - continue - } - } - } - - // stop the last timer - t.stopTimer() - - // update timeoutInfo and reset timer - // NOTE time.Timer allows duration to be non-positive - ti = newti - t.timer.Reset(ti.Duration) - t.Logger.Debug("Scheduled timeout", "dur", ti.Duration, "height", ti.Height, "round", ti.Round, "step", ti.Step) - case <-t.timer.C: - t.Logger.Info("Timed out", "dur", ti.Duration, "height", ti.Height, "round", ti.Round, "step", ti.Step) - // go routine here guarantees timeoutRoutine doesn't block. - // Determinism comes from playback in the receiveRoutine. - // We can eliminate it by merging the timeoutRoutine into receiveRoutine - // and managing the timeouts ourselves with a millisecond ticker - go func(toi timeoutInfo) { t.tockChan <- toi }(ti) - case <-t.Quit(): - return - } - } -} diff --git a/test/maverick/consensus/wal.go b/test/maverick/consensus/wal.go deleted file mode 100644 index 3640651ca..000000000 --- a/test/maverick/consensus/wal.go +++ /dev/null @@ -1,407 +0,0 @@ -package consensus - -import ( - "encoding/binary" - "errors" - "fmt" - "hash/crc32" - "io" - "path/filepath" - "time" - - "github.com/gogo/protobuf/proto" - - // tmjson "github.com/line/ostracon/libs/json" - tmcon "github.com/line/ostracon/consensus" - auto "github.com/line/ostracon/libs/autofile" - "github.com/line/ostracon/libs/log" - tmos "github.com/line/ostracon/libs/os" - "github.com/line/ostracon/libs/service" - tmcons "github.com/line/ostracon/proto/ostracon/consensus" - tmtime "github.com/line/ostracon/types/time" -) - -const ( - // time.Time + max consensus msg size - maxMsgSizeBytes = maxMsgSize + 24 - - // how often the WAL should be sync'd during period sync'ing - walDefaultFlushInterval = 2 * time.Second -) - -//-------------------------------------------------------- -// types and functions for savings consensus messages -// func init() { -// tmjson.RegisterType(msgInfo{}, "ostracon/wal/MsgInfo") -// tmjson.RegisterType(timeoutInfo{}, "ostracon/wal/TimeoutInfo") -// tmjson.RegisterType(tmcon.EndHeightMessage {}, "ostracon/wal/EndHeightMessage ") -// } - -// Write ahead logger writes msgs to disk before they are processed. -// Can be used for crash-recovery and deterministic replay. -// TODO: currently the wal is overwritten during replay catchup, give it a mode -// so it's either reading or appending - must read to end to start appending -// again. -type BaseWAL struct { - service.BaseService - - group *auto.Group - - enc *WALEncoder - - flushTicker *time.Ticker - flushInterval time.Duration -} - -var _ tmcon.WAL = &BaseWAL{} - -// NewWAL returns a new write-ahead logger based on `baseWAL`, which implements -// WAL. It's flushed and synced to disk every 2s and once when stopped. -func NewWAL(walFile string, groupOptions ...func(*auto.Group)) (*BaseWAL, error) { - err := tmos.EnsureDir(filepath.Dir(walFile), 0700) - if err != nil { - return nil, fmt.Errorf("failed to ensure WAL directory is in place: %w", err) - } - - group, err := auto.OpenGroup(walFile, groupOptions...) - if err != nil { - return nil, err - } - wal := &BaseWAL{ - group: group, - enc: NewWALEncoder(group), - flushInterval: walDefaultFlushInterval, - } - wal.BaseService = *service.NewBaseService(nil, "baseWAL", wal) - return wal, nil -} - -// SetFlushInterval allows us to override the periodic flush interval for the WAL. -func (wal *BaseWAL) SetFlushInterval(i time.Duration) { - wal.flushInterval = i -} - -func (wal *BaseWAL) Group() *auto.Group { - return wal.group -} - -func (wal *BaseWAL) SetLogger(l log.Logger) { - wal.BaseService.Logger = l - wal.group.SetLogger(l) -} - -func (wal *BaseWAL) OnStart() error { - size, err := wal.group.Head.Size() - if err != nil { - return err - } else if size == 0 { - if err := wal.WriteSync(tmcon.EndHeightMessage{Height: 0}); err != nil { - return err - } - } - err = wal.group.Start() - if err != nil { - return err - } - wal.flushTicker = time.NewTicker(wal.flushInterval) - go wal.processFlushTicks() - return nil -} - -func (wal *BaseWAL) processFlushTicks() { - for { - select { - case <-wal.flushTicker.C: - if err := wal.FlushAndSync(); err != nil { - wal.Logger.Error("Periodic WAL flush failed", "err", err) - } - case <-wal.Quit(): - return - } - } -} - -// FlushAndSync flushes and fsync's the underlying group's data to disk. -// See auto#FlushAndSync -func (wal *BaseWAL) FlushAndSync() error { - return wal.group.FlushAndSync() -} - -// Stop the underlying autofile group. -// Use Wait() to ensure it's finished shutting down -// before cleaning up files. -func (wal *BaseWAL) OnStop() { - wal.flushTicker.Stop() - if err := wal.FlushAndSync(); err != nil { - wal.Logger.Error("error on flush data to disk", "error", err) - } - if err := wal.group.Stop(); err != nil { - wal.Logger.Error("error trying to stop wal", "error", err) - } - wal.group.Close() -} - -// Wait for the underlying autofile group to finish shutting down -// so it's safe to cleanup files. -func (wal *BaseWAL) Wait() { - wal.group.Wait() -} - -// Write is called in newStep and for each receive on the -// peerMsgQueue and the timeoutTicker. -// NOTE: does not call fsync() -func (wal *BaseWAL) Write(msg tmcon.WALMessage) error { - if wal == nil { - return nil - } - - if err := wal.enc.Encode(&tmcon.TimedWALMessage{Time: tmtime.Now(), Msg: msg}); err != nil { - wal.Logger.Error("Error writing msg to consensus wal. WARNING: recover may not be possible for the current height", - "err", err, "msg", msg) - return err - } - - return nil -} - -// WriteSync is called when we receive a msg from ourselves -// so that we write to disk before sending signed messages. -// NOTE: calls fsync() -func (wal *BaseWAL) WriteSync(msg tmcon.WALMessage) error { - if wal == nil { - return nil - } - - if err := wal.Write(msg); err != nil { - return err - } - - if err := wal.FlushAndSync(); err != nil { - wal.Logger.Error(`WriteSync failed to flush consensus wal. - WARNING: may result in creating alternative proposals / votes for the current height iff the node restarted`, - "err", err) - return err - } - - return nil -} - -// WALSearchOptions are optional arguments to SearchForEndHeight. -type WALSearchOptions struct { - // IgnoreDataCorruptionErrors set to true will result in skipping data corruption errors. - IgnoreDataCorruptionErrors bool -} - -// SearchForEndHeight searches for the EndHeightMessage with the given height -// and returns an auto.GroupReader, whenever it was found or not and an error. -// Group reader will be nil if found equals false. -// -// CONTRACT: caller must close group reader. -func (wal *BaseWAL) SearchForEndHeight( - height int64, - options *tmcon.WALSearchOptions) (rd io.ReadCloser, found bool, err error) { - var ( - msg *tmcon.TimedWALMessage - gr *auto.GroupReader - ) - lastHeightFound := int64(-1) - - // NOTE: starting from the last file in the group because we're usually - // searching for the last height. See replay.go - min, max := wal.group.MinIndex(), wal.group.MaxIndex() - wal.Logger.Info("Searching for height", "height", height, "min", min, "max", max) - for index := max; index >= min; index-- { - gr, err = wal.group.NewReader(index) - if err != nil { - return nil, false, err - } - - dec := NewWALDecoder(gr) - for { - msg, err = dec.Decode() - if err == io.EOF { - // OPTIMISATION: no need to look for height in older files if we've seen h < height - if lastHeightFound > 0 && lastHeightFound < height { - gr.Close() - return nil, false, nil - } - // check next file - break - } - if options.IgnoreDataCorruptionErrors && IsDataCorruptionError(err) { - wal.Logger.Error("Corrupted entry. Skipping...", "err", err) - // do nothing - continue - } else if err != nil { - gr.Close() - return nil, false, err - } - - if m, ok := msg.Msg.(tmcon.EndHeightMessage); ok { - lastHeightFound = m.Height - if m.Height == height { // found - wal.Logger.Info("Found", "height", height, "index", index) - return gr, true, nil - } - } - } - gr.Close() - } - - return nil, false, nil -} - -// ///////////////////////////////////////////////////////////////////////////// - -// A WALEncoder writes custom-encoded WAL messages to an output stream. -// -// Format: 4 bytes CRC sum + 4 bytes length + arbitrary-length value -type WALEncoder struct { - wr io.Writer -} - -// NewWALEncoder returns a new encoder that writes to wr. -func NewWALEncoder(wr io.Writer) *WALEncoder { - return &WALEncoder{wr} -} - -// Encode writes the custom encoding of v to the stream. It returns an error if -// the encoded size of v is greater than 1MB. Any error encountered -// during the write is also returned. -func (enc *WALEncoder) Encode(v *tmcon.TimedWALMessage) error { - pbMsg, err := WALToProto(v.Msg) - if err != nil { - return err - } - pv := tmcons.TimedWALMessage{ - Time: v.Time, - Msg: pbMsg, - } - - data, err := proto.Marshal(&pv) - if err != nil { - panic(fmt.Errorf("encode timed wall message failure: %w", err)) - } - - crc := crc32.Checksum(data, crc32c) - length := uint32(len(data)) - if length > maxMsgSizeBytes { - return fmt.Errorf("msg is too big: %d bytes, max: %d bytes", length, maxMsgSizeBytes) - } - totalLength := 8 + int(length) - - msg := make([]byte, totalLength) - binary.BigEndian.PutUint32(msg[0:4], crc) - binary.BigEndian.PutUint32(msg[4:8], length) - copy(msg[8:], data) - - _, err = enc.wr.Write(msg) - return err -} - -// ///////////////////////////////////////////////////////////////////////////// - -// IsDataCorruptionError returns true if data has been corrupted inside WAL. -func IsDataCorruptionError(err error) bool { - _, ok := err.(DataCorruptionError) - return ok -} - -// DataCorruptionError is an error that occures if data on disk was corrupted. -type DataCorruptionError struct { - cause error -} - -func (e DataCorruptionError) Error() string { - return fmt.Sprintf("DataCorruptionError[%v]", e.cause) -} - -func (e DataCorruptionError) Cause() error { - return e.cause -} - -// A WALDecoder reads and decodes custom-encoded WAL messages from an input -// stream. See WALEncoder for the format used. -// -// It will also compare the checksums and make sure data size is equal to the -// length from the header. If that is not the case, error will be returned. -type WALDecoder struct { - rd io.Reader -} - -// NewWALDecoder returns a new decoder that reads from rd. -func NewWALDecoder(rd io.Reader) *WALDecoder { - return &WALDecoder{rd} -} - -// Decode reads the next custom-encoded value from its reader and returns it. -func (dec *WALDecoder) Decode() (*tmcon.TimedWALMessage, error) { - b := make([]byte, 4) - - _, err := dec.rd.Read(b) - if errors.Is(err, io.EOF) { - return nil, err - } - if err != nil { - return nil, DataCorruptionError{fmt.Errorf("failed to read checksum: %v", err)} - } - crc := binary.BigEndian.Uint32(b) - - b = make([]byte, 4) - _, err = dec.rd.Read(b) - if err != nil { - return nil, DataCorruptionError{fmt.Errorf("failed to read length: %v", err)} - } - length := binary.BigEndian.Uint32(b) - - if length > maxMsgSizeBytes { - return nil, DataCorruptionError{fmt.Errorf( - "length %d exceeded maximum possible value of %d bytes", - length, - maxMsgSizeBytes)} - } - - data := make([]byte, length) - n, err := dec.rd.Read(data) - if err != nil { - return nil, DataCorruptionError{fmt.Errorf("failed to read data: %v (read: %d, wanted: %d)", err, n, length)} - } - - // check checksum before decoding data - actualCRC := crc32.Checksum(data, crc32c) - if actualCRC != crc { - return nil, DataCorruptionError{fmt.Errorf("checksums do not match: read: %v, actual: %v", crc, actualCRC)} - } - - var res = new(tmcons.TimedWALMessage) - err = proto.Unmarshal(data, res) - if err != nil { - return nil, DataCorruptionError{fmt.Errorf("failed to decode data: %v", err)} - } - - walMsg, err := WALFromProto(res.Msg) - if err != nil { - return nil, DataCorruptionError{fmt.Errorf("failed to convert from proto: %w", err)} - } - tMsgWal := &tmcon.TimedWALMessage{ - Time: res.Time, - Msg: walMsg, - } - - return tMsgWal, err -} - -type nilWAL struct{} - -var _ tmcon.WAL = nilWAL{} - -func (nilWAL) Write(m tmcon.WALMessage) error { return nil } -func (nilWAL) WriteSync(m tmcon.WALMessage) error { return nil } -func (nilWAL) FlushAndSync() error { return nil } -func (nilWAL) SearchForEndHeight(height int64, - options *tmcon.WALSearchOptions) (rd io.ReadCloser, found bool, err error) { - return nil, false, nil -} -func (nilWAL) Start() error { return nil } -func (nilWAL) Stop() error { return nil } -func (nilWAL) Wait() {} diff --git a/test/maverick/consensus/wal_generator.go b/test/maverick/consensus/wal_generator.go deleted file mode 100644 index 83bdd3865..000000000 --- a/test/maverick/consensus/wal_generator.go +++ /dev/null @@ -1,235 +0,0 @@ -package consensus - -import ( - "bufio" - "bytes" - "fmt" - "io" - "path/filepath" - "testing" - "time" - - db "github.com/tendermint/tm-db" - - "github.com/line/ostracon/abci/example/kvstore" - cfg "github.com/line/ostracon/config" - tmcon "github.com/line/ostracon/consensus" - "github.com/line/ostracon/libs/log" - tmrand "github.com/line/ostracon/libs/rand" - "github.com/line/ostracon/privval" - "github.com/line/ostracon/proxy" - sm "github.com/line/ostracon/state" - "github.com/line/ostracon/store" - "github.com/line/ostracon/types" -) - -// WALGenerateNBlocks generates a consensus WAL. It does this by spinning up a -// stripped down version of node (proxy app, event bus, consensus state) with a -// persistent kvstore application and special consensus wal instance -// (byteBufferWAL) and waits until numBlocks are created. -// If the node fails to produce given numBlocks, it returns an error. -func WALGenerateNBlocks(t *testing.T, wr io.Writer, numBlocks int) (err error) { - config := getConfig(t) - - app := kvstore.NewPersistentKVStoreApplication(filepath.Join(config.DBDir(), "wal_generator")) - - logger := log.TestingLogger().With("wal_generator", "wal_generator") - logger.Info("generating WAL (last height msg excluded)", "numBlocks", numBlocks) - - // /////////////////////////////////////////////////////////////////////////// - // COPY PASTE FROM node.go WITH A FEW MODIFICATIONS - // NOTE: we can't import node package because of circular dependency. - // NOTE: we don't do handshake so need to set state.Version.Consensus.App directly. - privValidatorKeyFile := config.PrivValidatorKeyFile() - privValidatorStateFile := config.PrivValidatorStateFile() - privKeyType := config.PrivValidatorKeyType() - privValidator, err := privval.LoadOrGenFilePV(privValidatorKeyFile, privValidatorStateFile, privKeyType) - if err != nil { - return fmt.Errorf("failed to load FilePV: %w", err) - } - genDoc, err := types.GenesisDocFromFile(config.GenesisFile()) - if err != nil { - return fmt.Errorf("failed to read genesis file: %w", err) - } - blockStoreDB := db.NewMemDB() - stateDB := blockStoreDB - stateStore := sm.NewStore(stateDB) - state, err := sm.MakeGenesisState(genDoc) - if err != nil { - return fmt.Errorf("failed to make genesis state: %w", err) - } - state.ConsensusParams.Version.AppVersion = kvstore.ProtocolVersion - state.Version.Consensus.App = kvstore.ProtocolVersion - if err = stateStore.Save(state); err != nil { - t.Error(err) - } - - blockStore := store.NewBlockStore(blockStoreDB) - - proxyApp := proxy.NewAppConns(proxy.NewLocalClientCreator(app)) - proxyApp.SetLogger(logger.With("module", "proxy")) - if err := proxyApp.Start(); err != nil { - return fmt.Errorf("failed to start proxy app connections: %w", err) - } - t.Cleanup(func() { - if err := proxyApp.Stop(); err != nil { - t.Error(err) - } - }) - - eventBus := types.NewEventBus() - eventBus.SetLogger(logger.With("module", "events")) - if err := eventBus.Start(); err != nil { - return fmt.Errorf("failed to start event bus: %w", err) - } - t.Cleanup(func() { - if err := eventBus.Stop(); err != nil { - t.Error(err) - } - }) - mempool := emptyMempool{} - evpool := sm.EmptyEvidencePool{} - blockExec := sm.NewBlockExecutor(stateStore, log.TestingLogger(), proxyApp.Consensus(), mempool, evpool) - consensusState := NewState(config.Consensus, state.Copy(), - blockExec, blockStore, mempool, evpool, map[int64]Misbehavior{}) - consensusState.SetLogger(logger) - consensusState.SetEventBus(eventBus) - if privValidator != nil { - consensusState.SetPrivValidator(privValidator) - } - // END OF COPY PASTE - // /////////////////////////////////////////////////////////////////////////// - - // set consensus wal to buffered WAL, which will write all incoming msgs to buffer - numBlocksWritten := make(chan struct{}) - wal := newByteBufferWAL(logger, NewWALEncoder(wr), int64(numBlocks), numBlocksWritten) - // see wal.go#103 - if err := wal.Write(tmcon.EndHeightMessage{Height: 0}); err != nil { - t.Error(err) - } - - consensusState.wal = wal - - if err := consensusState.Start(); err != nil { - return fmt.Errorf("failed to start consensus state: %w", err) - } - - select { - case <-numBlocksWritten: - if err := consensusState.Stop(); err != nil { - t.Error(err) - } - return nil - case <-time.After(1 * time.Minute): - if err := consensusState.Stop(); err != nil { - t.Error(err) - } - return fmt.Errorf("waited too long for ostracon to produce %d blocks (grep logs for `wal_generator`)", numBlocks) - } -} - -// WALWithNBlocks returns a WAL content with numBlocks. -func WALWithNBlocks(t *testing.T, numBlocks int) (data []byte, err error) { - var b bytes.Buffer - wr := bufio.NewWriter(&b) - - if err := WALGenerateNBlocks(t, wr, numBlocks); err != nil { - return []byte{}, err - } - - wr.Flush() - return b.Bytes(), nil -} - -func randPort() int { - // returns between base and base + spread - base, spread := 20000, 20000 - return base + tmrand.Intn(spread) -} - -func makeAddrs() (string, string, string) { - start := randPort() - return fmt.Sprintf("tcp://127.0.0.1:%d", start), - fmt.Sprintf("tcp://127.0.0.1:%d", start+1), - fmt.Sprintf("tcp://127.0.0.1:%d", start+2) -} - -// getConfig returns a config for test cases -func getConfig(t *testing.T) *cfg.Config { - c := cfg.ResetTestRoot(t.Name()) - - // and we use random ports to run in parallel - tm, rpc, grpc := makeAddrs() - c.P2P.ListenAddress = tm - c.RPC.ListenAddress = rpc - c.RPC.GRPCListenAddress = grpc - return c -} - -// byteBufferWAL is a WAL which writes all msgs to a byte buffer. Writing stops -// when the heightToStop is reached. Client will be notified via -// signalWhenStopsTo channel. -type byteBufferWAL struct { - enc *WALEncoder - stopped bool - heightToStop int64 - signalWhenStopsTo chan<- struct{} - - logger log.Logger -} - -// needed for determinism -var fixedTime, _ = time.Parse(time.RFC3339, "2017-01-02T15:04:05Z") - -func newByteBufferWAL(logger log.Logger, enc *WALEncoder, nBlocks int64, signalStop chan<- struct{}) *byteBufferWAL { - return &byteBufferWAL{ - enc: enc, - heightToStop: nBlocks, - signalWhenStopsTo: signalStop, - logger: logger, - } -} - -// Save writes message to the internal buffer except when heightToStop is -// reached, in which case it will signal the caller via signalWhenStopsTo and -// skip writing. -func (w *byteBufferWAL) Write(m tmcon.WALMessage) error { - if w.stopped { - w.logger.Debug("WAL already stopped. Not writing message", "msg", m) - return nil - } - - if endMsg, ok := m.(tmcon.EndHeightMessage); ok { - w.logger.Debug("WAL write end height message", "height", endMsg.Height, "stopHeight", w.heightToStop) - if endMsg.Height == w.heightToStop { - w.logger.Debug("Stopping WAL at height", "height", endMsg.Height) - w.signalWhenStopsTo <- struct{}{} - w.stopped = true - return nil - } - } - - w.logger.Debug("WAL Write Message", "msg", m) - err := w.enc.Encode(&tmcon.TimedWALMessage{Time: fixedTime, Msg: m}) - if err != nil { - panic(fmt.Sprintf("failed to encode the msg %v", m)) - } - - return nil -} - -func (w *byteBufferWAL) WriteSync(m tmcon.WALMessage) error { - return w.Write(m) -} - -func (w *byteBufferWAL) FlushAndSync() error { return nil } - -func (w *byteBufferWAL) SearchForEndHeight( - height int64, - options *tmcon.WALSearchOptions) (rd io.ReadCloser, found bool, err error) { - return nil, false, nil -} - -func (w *byteBufferWAL) Start() error { return nil } -func (w *byteBufferWAL) Stop() error { return nil } -func (w *byteBufferWAL) Wait() {} diff --git a/test/maverick/main.go b/test/maverick/main.go deleted file mode 100644 index 3b01275ea..000000000 --- a/test/maverick/main.go +++ /dev/null @@ -1,245 +0,0 @@ -package main - -import ( - "fmt" - "os" - "path/filepath" - - "github.com/spf13/cobra" - "github.com/spf13/viper" - - cmd "github.com/line/ostracon/cmd/ostracon/commands" - "github.com/line/ostracon/cmd/ostracon/commands/debug" - cfg "github.com/line/ostracon/config" - "github.com/line/ostracon/libs/cli" - "github.com/line/ostracon/libs/log" - tmos "github.com/line/ostracon/libs/os" - tmrand "github.com/line/ostracon/libs/rand" - "github.com/line/ostracon/p2p" - cs "github.com/line/ostracon/test/maverick/consensus" - nd "github.com/line/ostracon/test/maverick/node" - "github.com/line/ostracon/types" - tmtime "github.com/line/ostracon/types/time" -) - -var ( - config = cfg.DefaultConfig() - logger = log.NewOCLogger(log.NewSyncWriter(os.Stdout)) - misbehaviorFlag = "" -) - -func init() { - registerFlagsRootCmd(RootCmd) -} - -func registerFlagsRootCmd(command *cobra.Command) { - command.PersistentFlags().String("log_level", config.LogLevel, "Log level") -} - -func ParseConfig() (*cfg.Config, error) { - conf := cfg.DefaultConfig() - err := viper.Unmarshal(conf) - if err != nil { - return nil, err - } - conf.SetRoot(conf.RootDir) - cfg.EnsureRoot(conf.RootDir) - if err = conf.ValidateBasic(); err != nil { - return nil, fmt.Errorf("error in config file: %v", err) - } - return conf, err -} - -// RootCmd is the root command for Ostracon core. -var RootCmd = &cobra.Command{ - Use: "maverick", - Short: "Ostracon Maverick Node", - Long: "Ostracon Maverick Node for testing with faulty consensus misbehaviors in a testnet. Contains " + - "all the functionality of a normal node but custom misbehaviors can be injected when running the node " + - "through a flag. See maverick node --help for how the misbehavior flag is constructured", - PersistentPreRunE: func(cmd *cobra.Command, args []string) (err error) { - fmt.Printf("use: %v, args: %v", cmd.Use, cmd.Args) - - config, err = ParseConfig() - if err != nil { - return err - } - - if config.LogFormat == cfg.LogFormatJSON { - logger = log.NewOCJSONLogger(log.NewSyncWriter(os.Stdout)) - } - - logger, err = log.ParseLogLevel(config.LogLevel, logger, cfg.DefaultLogLevel) - if err != nil { - return err - } - - if viper.GetBool(cli.TraceFlag) { - logger = log.NewTracingLogger(logger) - } - - logger = logger.With("module", "main") - return nil - }, -} - -func main() { - rootCmd := RootCmd - rootCmd.AddCommand( - ListMisbehaviorCmd, - cmd.GenValidatorCmd, - InitFilesCmd, - cmd.ProbeUpnpCmd, - cmd.ReplayCmd, - cmd.ReplayConsoleCmd, - cmd.ResetAllCmd, - cmd.ResetPrivValidatorCmd, - cmd.ShowValidatorCmd, - cmd.ShowNodeIDCmd, - cmd.GenNodeKeyCmd, - cmd.VersionCmd, - debug.DebugCmd, - cli.NewCompletionCmd(rootCmd, true), - ) - - nodeCmd := &cobra.Command{ - Use: "node", - Short: "Run the maverick node", - RunE: func(command *cobra.Command, args []string) error { - return startNode(config, logger, misbehaviorFlag) - }, - } - - cmd.AddNodeFlags(nodeCmd) - - // Create & start node - rootCmd.AddCommand(nodeCmd) - - // add special flag for misbehaviors - nodeCmd.Flags().StringVar( - &misbehaviorFlag, - "misbehaviors", - "", - "Select the misbehaviors of the node (comma-separated, no spaces in between): \n"+ - "e.g. --misbehaviors double-prevote,3\n"+ - "You can also have multiple misbehaviors: e.g. double-prevote,3,no-vote,5") - - userHome, err := os.UserHomeDir() - if err != nil { - panic(err) - } - cmd := cli.PrepareBaseCmd(rootCmd, "OC", filepath.Join(userHome, cfg.DefaultOstraconDir)) - if err := cmd.Execute(); err != nil { - panic(err) - } -} - -func startNode(config *cfg.Config, logger log.Logger, misbehaviorFlag string) error { - misbehaviors, err := nd.ParseMisbehaviors(misbehaviorFlag) - if err != nil { - return err - } - - node, err := nd.DefaultNewNode(config, logger, misbehaviors) - if err != nil { - return fmt.Errorf("failed to create node: %w", err) - } - - if err := node.Start(); err != nil { - return fmt.Errorf("failed to start node: %w", err) - } - - logger.Info("Started node", "nodeInfo", node.Switch().NodeInfo()) - - // Stop upon receiving SIGTERM or CTRL-C. - tmos.TrapSignal(logger, func() { - if node.IsRunning() { - if err := node.Stop(); err != nil { - logger.Error("unable to stop the node", "error", err) - } - } - }) - - // Run forever. - select {} -} - -var InitFilesCmd = &cobra.Command{ - Use: "init", - Short: "Initialize Ostracon", - RunE: initFiles, -} - -func initFiles(cmd *cobra.Command, args []string) error { - return initFilesWithConfig(config) -} - -func initFilesWithConfig(config *cfg.Config) error { - // private validator - privValKeyFile := config.PrivValidatorKeyFile() - privValStateFile := config.PrivValidatorStateFile() - var pv *nd.FilePV - if tmos.FileExists(privValKeyFile) { - pv = nd.LoadFilePV(privValKeyFile, privValStateFile) - logger.Info("Found private validator", "keyFile", privValKeyFile, - "stateFile", privValStateFile) - } else { - pv, _ = nd.GenFilePV(privValKeyFile, privValStateFile, config.PrivKeyType) - pv.Save() - logger.Info("Generated private validator", "keyFile", privValKeyFile, - "stateFile", privValStateFile) - } - - nodeKeyFile := config.NodeKeyFile() - if tmos.FileExists(nodeKeyFile) { - logger.Info("Found node key", "path", nodeKeyFile) - } else { - if _, err := p2p.LoadOrGenNodeKey(nodeKeyFile); err != nil { - return err - } - logger.Info("Generated node key", "path", nodeKeyFile) - } - - // genesis file - genFile := config.GenesisFile() - if tmos.FileExists(genFile) { - logger.Info("Found genesis file", "path", genFile) - } else { - genDoc := types.GenesisDoc{ - ChainID: fmt.Sprintf("test-chain-%v", tmrand.Str(6)), - GenesisTime: tmtime.Now(), - ConsensusParams: types.DefaultConsensusParams(), - } - pubKey, err := pv.GetPubKey() - if err != nil { - return fmt.Errorf("can't get pubkey: %w", err) - } - genDoc.Validators = []types.GenesisValidator{{ - Address: pubKey.Address(), - PubKey: pubKey, - Power: 10, - }} - - if err := genDoc.SaveAs(genFile); err != nil { - return err - } - logger.Info("Generated genesis file", "path", genFile) - } - - return nil -} - -var ListMisbehaviorCmd = &cobra.Command{ - Use: "misbehaviors", - Short: "Lists possible misbehaviors", - RunE: listMisbehaviors, -} - -func listMisbehaviors(cmd *cobra.Command, args []string) error { - str := "Currently registered misbehaviors: \n" - for key := range cs.MisbehaviorList { - str += fmt.Sprintf("- %s\n", key) - } - fmt.Println(str) - return nil -} diff --git a/test/maverick/node/node.go b/test/maverick/node/node.go deleted file mode 100644 index 4baa95d3e..000000000 --- a/test/maverick/node/node.go +++ /dev/null @@ -1,1500 +0,0 @@ -package node - -import ( - "bytes" - "context" - "errors" - "fmt" - "net" - "net/http" - _ "net/http/pprof" // nolint: gosec // securely exposed on separate, optional port - "strconv" - "strings" - "time" - - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promhttp" - "github.com/rs/cors" - - dbm "github.com/tendermint/tm-db" - - abci "github.com/line/ostracon/abci/types" - bcv0 "github.com/line/ostracon/blockchain/v0" - bcv1 "github.com/line/ostracon/blockchain/v1" - bcv2 "github.com/line/ostracon/blockchain/v2" - cfg "github.com/line/ostracon/config" - "github.com/line/ostracon/consensus" - "github.com/line/ostracon/crypto" - "github.com/line/ostracon/evidence" - tmjson "github.com/line/ostracon/libs/json" - "github.com/line/ostracon/libs/log" - tmpubsub "github.com/line/ostracon/libs/pubsub" - "github.com/line/ostracon/libs/service" - "github.com/line/ostracon/light" - mempl "github.com/line/ostracon/mempool" - "github.com/line/ostracon/p2p" - "github.com/line/ostracon/p2p/pex" - "github.com/line/ostracon/privval" - "github.com/line/ostracon/proxy" - rpccore "github.com/line/ostracon/rpc/core" - grpccore "github.com/line/ostracon/rpc/grpc" - rpcserver "github.com/line/ostracon/rpc/jsonrpc/server" - sm "github.com/line/ostracon/state" - "github.com/line/ostracon/state/indexer" - blockidxkv "github.com/line/ostracon/state/indexer/block/kv" - blockidxnull "github.com/line/ostracon/state/indexer/block/null" - "github.com/line/ostracon/state/txindex" - "github.com/line/ostracon/state/txindex/kv" - "github.com/line/ostracon/state/txindex/null" - "github.com/line/ostracon/statesync" - "github.com/line/ostracon/store" - cs "github.com/line/ostracon/test/maverick/consensus" - "github.com/line/ostracon/types" - tmtime "github.com/line/ostracon/types/time" - "github.com/line/ostracon/version" -) - -//------------------------------------------------------------------------------ - -// ParseMisbehaviors is a util function that converts a comma separated string into -// a map of misbehaviors to be executed by the maverick node -func ParseMisbehaviors(str string) (map[int64]cs.Misbehavior, error) { - // check if string is empty in which case we run a normal node - var misbehaviors = make(map[int64]cs.Misbehavior) - if str == "" { - return misbehaviors, nil - } - strs := strings.Split(str, ",") - if len(strs)%2 != 0 { - return misbehaviors, errors.New("missing either height or misbehavior name in the misbehavior flag") - } -OUTER_LOOP: - for i := 0; i < len(strs); i += 2 { - height, err := strconv.ParseInt(strs[i+1], 10, 64) - if err != nil { - return misbehaviors, fmt.Errorf("failed to parse misbehavior height: %w", err) - } - for key, misbehavior := range cs.MisbehaviorList { - if key == strs[i] { - misbehaviors[height] = misbehavior - continue OUTER_LOOP - } - } - return misbehaviors, fmt.Errorf("received unknown misbehavior: %s. Did you forget to add it?", strs[i]) - } - - return misbehaviors, nil -} - -// DBContext specifies config information for loading a new DB. -type DBContext struct { - ID string - Config *cfg.Config -} - -// DBProvider takes a DBContext and returns an instantiated DB. -type DBProvider func(*DBContext) (dbm.DB, error) - -// DefaultDBProvider returns a database using the DBBackend and DBDir -// specified in the ctx.Config. -func DefaultDBProvider(ctx *DBContext) (dbm.DB, error) { - dbType := dbm.BackendType(ctx.Config.DBBackend) - return dbm.NewDB(ctx.ID, dbType, ctx.Config.DBDir()) -} - -// GenesisDocProvider returns a GenesisDoc. -// It allows the GenesisDoc to be pulled from sources other than the -// filesystem, for instance from a distributed key-value store cluster. -type GenesisDocProvider func() (*types.GenesisDoc, error) - -// DefaultGenesisDocProviderFunc returns a GenesisDocProvider that loads -// the GenesisDoc from the config.GenesisFile() on the filesystem. -func DefaultGenesisDocProviderFunc(config *cfg.Config) GenesisDocProvider { - return func() (*types.GenesisDoc, error) { - return types.GenesisDocFromFile(config.GenesisFile()) - } -} - -// Provider takes a config and a logger and returns a ready to go Node. -type Provider func(*cfg.Config, log.Logger) (*Node, error) - -// DefaultNewNode returns an Ostracon node with default settings for the -// PrivValidator, ClientCreator, GenesisDoc, and DBProvider. -// It implements NodeProvider. -func DefaultNewNode(config *cfg.Config, logger log.Logger, misbehaviors map[int64]cs.Misbehavior) (*Node, error) { - nodeKey, err := p2p.LoadOrGenNodeKey(config.NodeKeyFile()) - if err != nil { - return nil, fmt.Errorf("failed to load or gen node key %s: %w", config.NodeKeyFile(), err) - } - - privKey, err := LoadOrGenFilePV( - config.PrivValidatorKeyFile(), - config.PrivValidatorStateFile(), - config.PrivValidatorKeyType()) - if err != nil { - return nil, fmt.Errorf("failed to create a private key: %s", err) - } - - return NewNode(config, - privKey, - nodeKey, - proxy.DefaultClientCreator(config.ProxyApp, config.ABCI, config.DBDir()), - DefaultGenesisDocProviderFunc(config), - DefaultDBProvider, - DefaultMetricsProvider(config.Instrumentation), - logger, - misbehaviors, - ) - -} - -// MetricsProvider returns a consensus, p2p and mempool Metrics. -type MetricsProvider func(chainID string) (*consensus.Metrics, *p2p.Metrics, *mempl.Metrics, *sm.Metrics) - -// DefaultMetricsProvider returns Metrics build using Prometheus client library -// if Prometheus is enabled. Otherwise, it returns no-op Metrics. -func DefaultMetricsProvider(config *cfg.InstrumentationConfig) MetricsProvider { - return func(chainID string) (*consensus.Metrics, *p2p.Metrics, *mempl.Metrics, *sm.Metrics) { - if config.Prometheus { - return consensus.PrometheusMetrics(config.Namespace, "chain_id", chainID), - p2p.PrometheusMetrics(config.Namespace, "chain_id", chainID), - mempl.PrometheusMetrics(config.Namespace, "chain_id", chainID), - sm.PrometheusMetrics(config.Namespace, "chain_id", chainID) - } - return consensus.NopMetrics(), p2p.NopMetrics(), mempl.NopMetrics(), sm.NopMetrics() - } -} - -// Option sets a parameter for the node. -type Option func(*Node) - -// Temporary interface for switching to fast sync, we should get rid of v0 and v1 reactors. -// See: https://github.com/tendermint/tendermint/issues/4595 -type fastSyncReactor interface { - SwitchToFastSync(sm.State) error -} - -// CustomReactors allows you to add custom reactors (name -> p2p.Reactor) to -// the node's Switch. -// -// WARNING: using any name from the below list of the existing reactors will -// result in replacing it with the custom one. -// -// - MEMPOOL -// - BLOCKCHAIN -// - CONSENSUS -// - EVIDENCE -// - PEX -// - STATESYNC -func CustomReactors(reactors map[string]p2p.Reactor) Option { - return func(n *Node) { - for name, reactor := range reactors { - if existingReactor := n.sw.Reactor(name); existingReactor != nil { - n.sw.Logger.Info("Replacing existing reactor with a custom one", - "name", name, "existing", existingReactor, "custom", reactor) - n.sw.RemoveReactor(name, existingReactor) - } - n.sw.AddReactor(name, reactor) - } - } -} - -func CustomReactorsAsConstructors(reactors map[string]func(n *Node) p2p.Reactor) Option { - return func(n *Node) { - for name, customReactor := range reactors { - if existingReactor := n.sw.Reactor(name); existingReactor != nil { - n.sw.Logger.Info("Replacing existing reactor with a custom one", - "name", name) - n.sw.RemoveReactor(name, existingReactor) - } - n.sw.AddReactor(name, customReactor(n)) - } - } -} - -// StateProvider overrides the state provider used by state sync to retrieve trusted app hashes and -// build a State object for bootstrapping the node. -// WARNING: this interface is considered unstable and subject to change. -func StateProvider(stateProvider statesync.StateProvider) Option { - return func(n *Node) { - n.stateSyncProvider = stateProvider - } -} - -//------------------------------------------------------------------------------ - -// Node is the highest level interface to a full Ostracon node. -// It includes all configuration information and running services. -type Node struct { - service.BaseService - - // config - config *cfg.Config - genesisDoc *types.GenesisDoc // initial validator set - privValidator types.PrivValidator // local node's validator key - - // network - transport *p2p.MultiplexTransport - sw *p2p.Switch // p2p connections - addrBook pex.AddrBook // known peers - nodeInfo p2p.NodeInfo - nodeKey *p2p.NodeKey // our node privkey - isListening bool - - // services - eventBus *types.EventBus // pub/sub for services - stateStore sm.Store - blockStore *store.BlockStore // store the blockchain to disk - bcReactor p2p.Reactor // for fast-syncing - mempoolReactor *mempl.Reactor // for gossipping transactions - mempool mempl.Mempool - stateSync bool // whether the node should state sync on startup - stateSyncReactor *statesync.Reactor // for hosting and restoring state sync snapshots - stateSyncProvider statesync.StateProvider // provides state data for bootstrapping a node - stateSyncGenesis sm.State // provides the genesis state for state sync - consensusState *cs.State // latest consensus state - consensusReactor *cs.Reactor // for participating in the consensus - pexReactor *pex.Reactor // for exchanging peer addresses - evidencePool *evidence.Pool // tracking evidence - proxyApp proxy.AppConns // connection to the application - rpcListeners []net.Listener // rpc servers - txIndexer txindex.TxIndexer - blockIndexer indexer.BlockIndexer - indexerService *txindex.IndexerService - prometheusSrv *http.Server -} - -func initDBs(config *cfg.Config, dbProvider DBProvider) (blockStore *store.BlockStore, stateDB dbm.DB, err error) { - var blockStoreDB dbm.DB - blockStoreDB, err = dbProvider(&DBContext{"blockstore", config}) - if err != nil { - return - } - blockStore = store.NewBlockStore(blockStoreDB) - - stateDB, err = dbProvider(&DBContext{"state", config}) - if err != nil { - return - } - - return -} - -func createAndStartProxyAppConns(clientCreator proxy.ClientCreator, logger log.Logger) (proxy.AppConns, error) { - proxyApp := proxy.NewAppConns(clientCreator) - proxyApp.SetLogger(logger.With("module", "proxy")) - if err := proxyApp.Start(); err != nil { - return nil, fmt.Errorf("error starting proxy app connections: %v", err) - } - return proxyApp, nil -} - -func createAndStartEventBus(logger log.Logger) (*types.EventBus, error) { - eventBus := types.NewEventBus() - eventBus.SetLogger(logger.With("module", "events")) - if err := eventBus.Start(); err != nil { - return nil, err - } - return eventBus, nil -} - -func createAndStartIndexerService( - config *cfg.Config, - dbProvider DBProvider, - eventBus *types.EventBus, - logger log.Logger, -) (*txindex.IndexerService, txindex.TxIndexer, indexer.BlockIndexer, error) { - - var ( - txIndexer txindex.TxIndexer - blockIndexer indexer.BlockIndexer - ) - - switch config.TxIndex.Indexer { - case "kv": - store, err := dbProvider(&DBContext{"tx_index", config}) - if err != nil { - return nil, nil, nil, err - } - - txIndexer = kv.NewTxIndex(store) - blockIndexer = blockidxkv.New(dbm.NewPrefixDB(store, []byte("block_events"))) - default: - txIndexer = &null.TxIndex{} - blockIndexer = &blockidxnull.BlockerIndexer{} - } - - indexerService := txindex.NewIndexerService(txIndexer, blockIndexer, eventBus) - indexerService.SetLogger(logger.With("module", "txindex")) - - if err := indexerService.Start(); err != nil { - return nil, nil, nil, err - } - - return indexerService, txIndexer, blockIndexer, nil -} - -func doHandshake( - stateStore sm.Store, - state sm.State, - blockStore sm.BlockStore, - genDoc *types.GenesisDoc, - eventBus types.BlockEventPublisher, - proxyApp proxy.AppConns, - consensusLogger log.Logger) error { - - handshaker := cs.NewHandshaker(stateStore, state, blockStore, genDoc) - handshaker.SetLogger(consensusLogger) - handshaker.SetEventBus(eventBus) - if err := handshaker.Handshake(proxyApp); err != nil { - return fmt.Errorf("error during handshake: %v", err) - } - return nil -} - -func logNodeStartupInfo(state sm.State, pubKey crypto.PubKey, logger, consensusLogger log.Logger) { - // Log the version info. - logger.Info("Version info", - "software", version.OCCoreSemVer, - "abci", version.ABCIVersion, - "app", version.AppProtocol, - "block", version.BlockProtocol, - "p2p", version.P2PProtocol, - ) - - // If the state and software differ in block version, at least log it. - if state.Version.Consensus.Block != version.BlockProtocol { - logger.Info("Software and state have different block protocols", - "software", version.BlockProtocol, - "state", state.Version.Consensus.Block, - ) - } - - addr := pubKey.Address() - // Log whether this node is a validator or an observer - if state.Validators.HasAddress(addr) { - consensusLogger.Info("This node is a validator", "addr", addr, "pubKey", pubKey) - } else { - consensusLogger.Info("This node is not a validator", "addr", addr, "pubKey", pubKey) - } -} - -func onlyValidatorIsUs(state sm.State, pubKey crypto.PubKey) bool { - if state.Validators.Size() > 1 { - return false - } - addr, _ := state.Validators.GetByIndex(0) - return bytes.Equal(pubKey.Address(), addr) -} - -func createMempoolAndMempoolReactor(config *cfg.Config, proxyApp proxy.AppConns, - state sm.State, memplMetrics *mempl.Metrics, logger log.Logger) (*mempl.Reactor, *mempl.CListMempool) { - - mempool := mempl.NewCListMempool( - config.Mempool, - proxyApp.Mempool(), - state.LastBlockHeight, - mempl.WithMetrics(memplMetrics), - mempl.WithPreCheck(sm.TxPreCheck(state)), - mempl.WithPostCheck(sm.TxPostCheck(state)), - ) - mempoolLogger := logger.With("module", "mempool") - mempoolReactor := mempl.NewReactor(config.Mempool, config.P2P.RecvAsync, config.P2P.MempoolRecvBufSize, mempool) - mempoolReactor.SetLogger(mempoolLogger) - - if config.Consensus.WaitForTxs() { - mempool.EnableTxsAvailable() - } - return mempoolReactor, mempool -} - -func createEvidenceReactor(config *cfg.Config, dbProvider DBProvider, - stateDB dbm.DB, blockStore *store.BlockStore, logger log.Logger) (*evidence.Reactor, *evidence.Pool, error) { - - evidenceDB, err := dbProvider(&DBContext{"evidence", config}) - if err != nil { - return nil, nil, err - } - evidenceLogger := logger.With("module", "evidence") - evidencePool, err := evidence.NewPool(evidenceDB, sm.NewStore(stateDB), blockStore) - if err != nil { - return nil, nil, err - } - evidenceReactor := evidence.NewReactor(evidencePool, config.P2P.RecvAsync, config.P2P.EvidenceRecvBufSize) - evidenceReactor.SetLogger(evidenceLogger) - return evidenceReactor, evidencePool, nil -} - -func createBlockchainReactor(config *cfg.Config, - state sm.State, - blockExec *sm.BlockExecutor, - blockStore *store.BlockStore, - fastSync bool, - logger log.Logger) (bcReactor p2p.Reactor, err error) { - - switch config.FastSync.Version { - case "v0": - bcReactor = bcv0.NewBlockchainReactor( - state.Copy(), - blockExec, - blockStore, - fastSync, - config.P2P.RecvAsync, - config.P2P.BlockchainRecvBufSize, - ) - case "v1": - bcReactor = bcv1.NewBlockchainReactor( - state.Copy(), - blockExec, - blockStore, - fastSync, - config.P2P.RecvAsync, - config.P2P.BlockchainRecvBufSize, - ) - case "v2": - bcReactor = bcv2.NewBlockchainReactor( - state.Copy(), - blockExec, - blockStore, - fastSync, - ) - default: - return nil, fmt.Errorf("unknown fastsync version %s", config.FastSync.Version) - } - - bcReactor.SetLogger(logger.With("module", "blockchain")) - return bcReactor, nil -} - -func createConsensusReactor(config *cfg.Config, - state sm.State, - blockExec *sm.BlockExecutor, - blockStore sm.BlockStore, - mempool *mempl.CListMempool, - evidencePool *evidence.Pool, - privValidator types.PrivValidator, - csMetrics *consensus.Metrics, - waitSync bool, - eventBus *types.EventBus, - consensusLogger log.Logger, - misbehaviors map[int64]cs.Misbehavior) (*cs.Reactor, *cs.State) { - - consensusState := cs.NewState( - config.Consensus, - state.Copy(), - blockExec, - blockStore, - mempool, - evidencePool, - misbehaviors, - cs.StateMetrics(csMetrics), - ) - consensusState.SetLogger(consensusLogger) - if privValidator != nil { - consensusState.SetPrivValidator(privValidator) - } - consensusReactor := cs.NewReactor(consensusState, waitSync, config.P2P.RecvAsync, config.P2P.ConsensusRecvBufSize, - cs.ReactorMetrics(csMetrics)) - consensusReactor.SetLogger(consensusLogger) - // services which will be publishing and/or subscribing for messages (events) - // consensusReactor will set it on consensusState and blockExecutor - consensusReactor.SetEventBus(eventBus) - return consensusReactor, consensusState -} - -func createTransport( - config *cfg.Config, - nodeInfo p2p.NodeInfo, - nodeKey *p2p.NodeKey, - proxyApp proxy.AppConns, -) ( - *p2p.MultiplexTransport, - []p2p.PeerFilterFunc, -) { - var ( - mConnConfig = p2p.MConnConfig(config.P2P) - transport = p2p.NewMultiplexTransport(nodeInfo, *nodeKey, mConnConfig) - connFilters = []p2p.ConnFilterFunc{} - peerFilters = []p2p.PeerFilterFunc{} - ) - - if !config.P2P.AllowDuplicateIP { - connFilters = append(connFilters, p2p.ConnDuplicateIPFilter()) - } - - // Filter peers by addr or pubkey with an ABCI query. - // If the query return code is OK, add peer. - if config.FilterPeers { - connFilters = append( - connFilters, - // ABCI query for address filtering. - func(_ p2p.ConnSet, c net.Conn, _ []net.IP) error { - res, err := proxyApp.Query().QuerySync(abci.RequestQuery{ - Path: fmt.Sprintf("/p2p/filter/addr/%s", c.RemoteAddr().String()), - }) - if err != nil { - return err - } - if res.IsErr() { - return fmt.Errorf("error querying abci app: %v", res) - } - - return nil - }, - ) - - peerFilters = append( - peerFilters, - // ABCI query for ID filtering. - func(_ p2p.IPeerSet, p p2p.Peer) error { - res, err := proxyApp.Query().QuerySync(abci.RequestQuery{ - Path: fmt.Sprintf("/p2p/filter/id/%s", p.ID()), - }) - if err != nil { - return err - } - if res.IsErr() { - return fmt.Errorf("error querying abci app: %v", res) - } - - return nil - }, - ) - } - - p2p.MultiplexTransportConnFilters(connFilters...)(transport) - - // Limit the number of incoming connections. - max := config.P2P.MaxNumInboundPeers + len(splitAndTrimEmpty(config.P2P.UnconditionalPeerIDs, ",", " ")) - p2p.MultiplexTransportMaxIncomingConnections(max)(transport) - - return transport, peerFilters -} - -func createSwitch(config *cfg.Config, - transport p2p.Transport, - p2pMetrics *p2p.Metrics, - peerFilters []p2p.PeerFilterFunc, - mempoolReactor *mempl.Reactor, - bcReactor p2p.Reactor, - stateSyncReactor *statesync.Reactor, - consensusReactor *cs.Reactor, - evidenceReactor *evidence.Reactor, - nodeInfo p2p.NodeInfo, - nodeKey *p2p.NodeKey, - p2pLogger log.Logger) *p2p.Switch { - - sw := p2p.NewSwitch( - config.P2P, - transport, - p2p.WithMetrics(p2pMetrics), - p2p.SwitchPeerFilters(peerFilters...), - ) - sw.SetLogger(p2pLogger) - sw.AddReactor("MEMPOOL", mempoolReactor) - sw.AddReactor("BLOCKCHAIN", bcReactor) - sw.AddReactor("CONSENSUS", consensusReactor) - sw.AddReactor("EVIDENCE", evidenceReactor) - sw.AddReactor("STATESYNC", stateSyncReactor) - - sw.SetNodeInfo(nodeInfo) - sw.SetNodeKey(nodeKey) - - p2pLogger.Info("P2P Node ID", "ID", nodeKey.ID(), "file", config.NodeKeyFile()) - return sw -} - -func createAddrBookAndSetOnSwitch(config *cfg.Config, sw *p2p.Switch, - p2pLogger log.Logger, nodeKey *p2p.NodeKey) (pex.AddrBook, error) { - - addrBook := pex.NewAddrBook(config.P2P.AddrBookFile(), config.P2P.AddrBookStrict) - addrBook.SetLogger(p2pLogger.With("book", config.P2P.AddrBookFile())) - - // Add ourselves to addrbook to prevent dialing ourselves - if config.P2P.ExternalAddress != "" { - addr, err := p2p.NewNetAddressString(p2p.IDAddressString(nodeKey.ID(), config.P2P.ExternalAddress)) - if err != nil { - return nil, fmt.Errorf("p2p.external_address is incorrect: %w", err) - } - addrBook.AddOurAddress(addr) - } - if config.P2P.ListenAddress != "" { - addr, err := p2p.NewNetAddressString(p2p.IDAddressString(nodeKey.ID(), config.P2P.ListenAddress)) - if err != nil { - return nil, fmt.Errorf("p2p.laddr is incorrect: %w", err) - } - addrBook.AddOurAddress(addr) - } - - sw.SetAddrBook(addrBook) - - return addrBook, nil -} - -func createPEXReactorAndAddToSwitch(addrBook pex.AddrBook, config *cfg.Config, - sw *p2p.Switch, logger log.Logger) *pex.Reactor { - - // TODO persistent peers ? so we can have their DNS addrs saved - pexReactor := pex.NewReactor(addrBook, config.P2P.RecvAsync, - &pex.ReactorConfig{ - Seeds: splitAndTrimEmpty(config.P2P.Seeds, ",", " "), - SeedMode: config.P2P.SeedMode, - // See consensus/reactor.go: blocksToContributeToBecomeGoodPeer 10000 - // blocks assuming 10s blocks ~ 28 hours. - // TODO (melekes): make it dynamic based on the actual block latencies - // from the live network. - // https://github.com/tendermint/tendermint/issues/3523 - SeedDisconnectWaitPeriod: 28 * time.Hour, - PersistentPeersMaxDialPeriod: config.P2P.PersistentPeersMaxDialPeriod, - RecvBufSize: config.P2P.PexRecvBufSize, - }) - pexReactor.SetLogger(logger.With("module", "pex")) - sw.AddReactor("PEX", pexReactor) - return pexReactor -} - -// startStateSync starts an asynchronous state sync process, then switches to fast sync mode. -func startStateSync(ssR *statesync.Reactor, bcR fastSyncReactor, conR *cs.Reactor, - stateProvider statesync.StateProvider, config *cfg.StateSyncConfig, fastSync bool, - stateStore sm.Store, blockStore *store.BlockStore, state sm.State) error { - ssR.Logger.Info("Starting state sync") - - if stateProvider == nil { - var err error - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - stateProvider, err = statesync.NewLightClientStateProvider( - ctx, - state.ChainID, state.Version, state.InitialHeight, - config.RPCServers, light.TrustOptions{ - Period: config.TrustPeriod, - Height: config.TrustHeight, - Hash: config.TrustHashBytes(), - }, ssR.Logger.With("module", "light")) - if err != nil { - return fmt.Errorf("failed to set up light client state provider: %w", err) - } - } - - go func() { - state, previousState, commit, err := ssR.Sync(stateProvider, config.DiscoveryTime) - if err != nil { - ssR.Logger.Error("State sync failed", "err", err) - return - } - if previousState.LastBlockHeight > 0 { - err = stateStore.Bootstrap(previousState) - if err != nil { - ssR.Logger.Error("Failed to bootstrap node with previous state", "err", err) - return - } - } - err = stateStore.Bootstrap(state) - if err != nil { - ssR.Logger.Error("Failed to bootstrap node with new state", "err", err) - return - } - err = blockStore.SaveSeenCommit(state.LastBlockHeight, commit) - if err != nil { - ssR.Logger.Error("Failed to store last seen commit", "err", err) - return - } - - if fastSync { - // FIXME Very ugly to have these metrics bleed through here. - conR.Metrics.StateSyncing.Set(0) - conR.Metrics.FastSyncing.Set(1) - err = bcR.SwitchToFastSync(state) - if err != nil { - ssR.Logger.Error("Failed to switch to fast sync", "err", err) - return - } - } else { - conR.SwitchToConsensus(state, true) - } - }() - return nil -} - -// NewNode returns a new, ready to go, Ostracon Node. -func NewNode(config *cfg.Config, - privValidator types.PrivValidator, - nodeKey *p2p.NodeKey, - clientCreator proxy.ClientCreator, - genesisDocProvider GenesisDocProvider, - dbProvider DBProvider, - metricsProvider MetricsProvider, - logger log.Logger, - misbehaviors map[int64]cs.Misbehavior, - options ...Option) (*Node, error) { - - blockStore, stateDB, err := initDBs(config, dbProvider) - if err != nil { - return nil, err - } - - stateStore := sm.NewStore(stateDB) - - state, genDoc, err := LoadStateFromDBOrGenesisDocProvider(stateDB, genesisDocProvider) - if err != nil { - return nil, err - } - - // Create the proxyApp and establish connections to the ABCI app (consensus, mempool, query). - proxyApp, err := createAndStartProxyAppConns(clientCreator, logger) - if err != nil { - return nil, err - } - - // EventBus and IndexerService must be started before the handshake because - // we might need to index the txs of the replayed block as this might not have happened - // when the node stopped last time (i.e. the node stopped after it saved the block - // but before it indexed the txs, or, endblocker panicked) - eventBus, err := createAndStartEventBus(logger) - if err != nil { - return nil, err - } - - indexerService, txIndexer, blockIndexer, err := createAndStartIndexerService(config, dbProvider, eventBus, logger) - if err != nil { - return nil, err - } - - // If an address is provided, listen on the socket for a connection from an - // external signing process. - if config.PrivValidatorListenAddr != "" { - // FIXME: we should start services inside OnStart - privValidator, err = createAndStartPrivValidatorSocketClient(config.PrivValidatorListenAddr, genDoc.ChainID, logger) - if err != nil { - return nil, fmt.Errorf("error with private validator socket client: %w", err) - } - } - - pubKey, err := privValidator.GetPubKey() - if err != nil { - return nil, fmt.Errorf("can't get pubkey: %w", err) - } - - // Determine whether we should do state and/or fast sync. - // We don't fast-sync when the only validator is us. - fastSync := config.FastSyncMode && !onlyValidatorIsUs(state, pubKey) - stateSync := config.StateSync.Enable && !onlyValidatorIsUs(state, pubKey) - if stateSync && state.LastBlockHeight > 0 { - logger.Info("Found local state with non-zero height, skipping state sync") - stateSync = false - } - - // Create the handshaker, which calls RequestInfo, sets the AppVersion on the state, - // and replays any blocks as necessary to sync ostracon with the app. - consensusLogger := logger.With("module", "consensus") - if !stateSync { - if err := doHandshake(stateStore, state, blockStore, genDoc, eventBus, proxyApp, consensusLogger); err != nil { - return nil, err - } - - // Reload the state. It will have the Version.Consensus.App set by the - // Handshake, and may have other modifications as well (ie. depending on - // what happened during block replay). - state, err = stateStore.Load() - if err != nil { - return nil, fmt.Errorf("cannot load state: %w", err) - } - } - - logNodeStartupInfo(state, pubKey, logger, consensusLogger) - - csMetrics, p2pMetrics, memplMetrics, smMetrics := metricsProvider(genDoc.ChainID) - - // Make MempoolReactor - mempoolReactor, mempool := createMempoolAndMempoolReactor(config, proxyApp, state, memplMetrics, logger) - - // Make Evidence Reactor - evidenceReactor, evidencePool, err := createEvidenceReactor(config, dbProvider, stateDB, blockStore, logger) - if err != nil { - return nil, err - } - - // make block executor for consensus and blockchain reactors to execute blocks - blockExec := sm.NewBlockExecutor( - stateStore, - logger.With("module", "state"), - proxyApp.Consensus(), - mempool, - evidencePool, - sm.BlockExecutorWithMetrics(smMetrics), - ) - - // Make BlockchainReactor. Don't start fast sync if we're doing a state sync first. - bcReactor, err := createBlockchainReactor(config, state, blockExec, blockStore, fastSync && !stateSync, logger) - if err != nil { - return nil, fmt.Errorf("could not create blockchain reactor: %w", err) - } - - // Make ConsensusReactor. Don't enable fully if doing a state sync and/or fast sync first. - // FIXME We need to update metrics here, since other reactors don't have access to them. - if stateSync { - csMetrics.StateSyncing.Set(1) - } else if fastSync { - csMetrics.FastSyncing.Set(1) - } - - logger.Info("Setting up maverick consensus reactor", "Misbehaviors", misbehaviors) - consensusReactor, consensusState := createConsensusReactor( - config, state, blockExec, blockStore, mempool, evidencePool, - privValidator, csMetrics, stateSync || fastSync, eventBus, consensusLogger, misbehaviors) - - // Set up state sync reactor, and schedule a sync if requested. - // FIXME The way we do phased startups (e.g. replay -> fast sync -> consensus) is very messy, - // we should clean this whole thing up. See: - // https://github.com/tendermint/tendermint/issues/4644 - stateSyncReactor := statesync.NewReactor( - *config.StateSync, - proxyApp.Snapshot(), - proxyApp.Query(), - config.P2P.RecvAsync, - config.P2P.StatesyncRecvBufSize) - stateSyncReactor.SetLogger(logger.With("module", "statesync")) - - nodeInfo, err := makeNodeInfo(config, nodeKey, txIndexer, genDoc, state) - if err != nil { - return nil, err - } - - // Setup Transport. - transport, peerFilters := createTransport(config, nodeInfo, nodeKey, proxyApp) - - // Setup Switch. - p2pLogger := logger.With("module", "p2p") - sw := createSwitch( - config, transport, p2pMetrics, peerFilters, mempoolReactor, bcReactor, - stateSyncReactor, consensusReactor, evidenceReactor, nodeInfo, nodeKey, p2pLogger, - ) - - err = sw.AddPersistentPeers(splitAndTrimEmpty(config.P2P.PersistentPeers, ",", " ")) - if err != nil { - return nil, fmt.Errorf("could not add peers from persistent_peers field: %w", err) - } - - err = sw.AddUnconditionalPeerIDs(splitAndTrimEmpty(config.P2P.UnconditionalPeerIDs, ",", " ")) - if err != nil { - return nil, fmt.Errorf("could not add peer ids from unconditional_peer_ids field: %w", err) - } - - addrBook, err := createAddrBookAndSetOnSwitch(config, sw, p2pLogger, nodeKey) - if err != nil { - return nil, fmt.Errorf("could not create addrbook: %w", err) - } - - // Optionally, start the pex reactor - // - // TODO: - // - // We need to set Seeds and PersistentPeers on the switch, - // since it needs to be able to use these (and their DNS names) - // even if the PEX is off. We can include the DNS name in the NetAddress, - // but it would still be nice to have a clear list of the current "PersistentPeers" - // somewhere that we can return with net_info. - // - // If PEX is on, it should handle dialing the seeds. Otherwise the switch does it. - // Note we currently use the addrBook regardless at least for AddOurAddress - var pexReactor *pex.Reactor - if config.P2P.PexReactor { - pexReactor = createPEXReactorAndAddToSwitch(addrBook, config, sw, logger) - } - - if config.RPC.PprofListenAddress != "" { - go func() { - logger.Info("Starting pprof server", "laddr", config.RPC.PprofListenAddress) - logger.Error("pprof server error", "err", http.ListenAndServe(config.RPC.PprofListenAddress, nil)) - }() - } - - node := &Node{ - config: config, - genesisDoc: genDoc, - privValidator: privValidator, - - transport: transport, - sw: sw, - addrBook: addrBook, - nodeInfo: nodeInfo, - nodeKey: nodeKey, - - stateStore: stateStore, - blockStore: blockStore, - bcReactor: bcReactor, - mempoolReactor: mempoolReactor, - mempool: mempool, - consensusState: consensusState, - consensusReactor: consensusReactor, - stateSyncReactor: stateSyncReactor, - stateSync: stateSync, - stateSyncGenesis: state, // Shouldn't be necessary, but need a way to pass the genesis state - pexReactor: pexReactor, - evidencePool: evidencePool, - proxyApp: proxyApp, - txIndexer: txIndexer, - indexerService: indexerService, - blockIndexer: blockIndexer, - eventBus: eventBus, - } - node.BaseService = *service.NewBaseService(logger, "Node", node) - - for _, option := range options { - option(node) - } - - return node, nil -} - -// OnStart starts the Node. It implements service.Service. -func (n *Node) OnStart() error { - now := tmtime.Now() - genTime := n.genesisDoc.GenesisTime - if genTime.After(now) { - n.Logger.Info("Genesis time is in the future. Sleeping until then...", "genTime", genTime) - time.Sleep(genTime.Sub(now)) - } - - // Add private IDs to addrbook to block those peers being added - n.addrBook.AddPrivateIDs(splitAndTrimEmpty(n.config.P2P.PrivatePeerIDs, ",", " ")) - - // Start the RPC server before the P2P server - // so we can eg. receive txs for the first block - if n.config.RPC.ListenAddress != "" { - listeners, err := n.startRPC() - if err != nil { - return err - } - n.rpcListeners = listeners - } - - if n.config.Instrumentation.Prometheus && - n.config.Instrumentation.PrometheusListenAddr != "" { - n.prometheusSrv = n.startPrometheusServer(n.config.Instrumentation.PrometheusListenAddr) - } - - // Start the transport. - addr, err := p2p.NewNetAddressString(p2p.IDAddressString(n.nodeKey.ID(), n.config.P2P.ListenAddress)) - if err != nil { - return err - } - if err := n.transport.Listen(*addr); err != nil { - return err - } - - n.isListening = true - - if n.config.Mempool.WalEnabled() { - err = n.mempool.InitWAL() - if err != nil { - return fmt.Errorf("init mempool WAL: %w", err) - } - } - - // Start the switch (the P2P server). - err = n.sw.Start() - if err != nil { - return err - } - - // Always connect to persistent peers - err = n.sw.DialPeersAsync(splitAndTrimEmpty(n.config.P2P.PersistentPeers, ",", " ")) - if err != nil { - return fmt.Errorf("could not dial peers from persistent_peers field: %w", err) - } - - // Run state sync - if n.stateSync { - bcR, ok := n.bcReactor.(fastSyncReactor) - if !ok { - return fmt.Errorf("this blockchain reactor does not support switching from state sync") - } - err := startStateSync(n.stateSyncReactor, bcR, n.consensusReactor, n.stateSyncProvider, - n.config.StateSync, n.config.FastSyncMode, n.stateStore, n.blockStore, n.stateSyncGenesis) - if err != nil { - return fmt.Errorf("failed to start state sync: %w", err) - } - } - - return nil -} - -// OnStop stops the Node. It implements service.Service. -func (n *Node) OnStop() { - n.BaseService.OnStop() - - n.Logger.Info("Stopping Node") - - // first stop the non-reactor services - if err := n.eventBus.Stop(); err != nil { - n.Logger.Error("Error closing eventBus", "err", err) - } - if err := n.indexerService.Stop(); err != nil { - n.Logger.Error("Error closing indexerService", "err", err) - } - - // now stop the reactors - if err := n.sw.Stop(); err != nil { - n.Logger.Error("Error closing switch", "err", err) - } - - // stop mempool WAL - if n.config.Mempool.WalEnabled() { - n.mempool.CloseWAL() - } - - if err := n.transport.Close(); err != nil { - n.Logger.Error("Error closing transport", "err", err) - } - - n.isListening = false - - // finally stop the listeners / external services - for _, l := range n.rpcListeners { - n.Logger.Info("Closing rpc listener", "listener", l) - if err := l.Close(); err != nil { - n.Logger.Error("Error closing listener", "listener", l, "err", err) - } - } - - if pvsc, ok := n.privValidator.(service.Service); ok { - if err := pvsc.Stop(); err != nil { - n.Logger.Error("Error closing private validator", "err", err) - } - } - - if n.prometheusSrv != nil { - if err := n.prometheusSrv.Shutdown(context.Background()); err != nil { - // Error from closing listeners, or context timeout: - n.Logger.Error("Prometheus HTTP server Shutdown", "err", err) - } - } -} - -// ConfigureRPC makes sure RPC has all the objects it needs to operate. -func (n *Node) ConfigureRPC() error { - pubKey, err := n.privValidator.GetPubKey() - if err != nil { - return fmt.Errorf("can't get pubkey: %w", err) - } - rpccore.SetEnvironment(&rpccore.Environment{ - ProxyAppQuery: n.proxyApp.Query(), - ProxyAppMempool: n.proxyApp.Mempool(), - - StateStore: n.stateStore, - BlockStore: n.blockStore, - EvidencePool: n.evidencePool, - ConsensusState: n.consensusState, - P2PPeers: n.sw, - P2PTransport: n, - - PubKey: pubKey, - GenDoc: n.genesisDoc, - TxIndexer: n.txIndexer, - BlockIndexer: n.blockIndexer, - ConsensusReactor: &consensus.Reactor{}, - EventBus: n.eventBus, - Mempool: n.mempool, - - Logger: n.Logger.With("module", "rpc"), - - Config: *n.config.RPC, - }) - return nil -} - -func (n *Node) startRPC() ([]net.Listener, error) { - err := n.ConfigureRPC() - if err != nil { - return nil, err - } - - listenAddrs := splitAndTrimEmpty(n.config.RPC.ListenAddress, ",", " ") - - if n.config.RPC.Unsafe { - rpccore.AddUnsafeRoutes() - } - - config := rpcserver.DefaultConfig() - config.MaxBodyBytes = n.config.RPC.MaxBodyBytes - config.MaxHeaderBytes = n.config.RPC.MaxHeaderBytes - config.MaxOpenConnections = n.config.RPC.MaxOpenConnections - // If necessary adjust global WriteTimeout to ensure it's greater than - // TimeoutBroadcastTxCommit. - // See https://github.com/tendermint/tendermint/issues/3435 - if config.WriteTimeout <= n.config.RPC.TimeoutBroadcastTxCommit { - config.WriteTimeout = n.config.RPC.TimeoutBroadcastTxCommit + 1*time.Second - } - - // we may expose the rpc over both a unix and tcp socket - listeners := make([]net.Listener, len(listenAddrs)) - for i, listenAddr := range listenAddrs { - mux := http.NewServeMux() - rpcLogger := n.Logger.With("module", "rpc-server") - wmLogger := rpcLogger.With("protocol", "websocket") - wm := rpcserver.NewWebsocketManager(rpccore.Routes, - rpcserver.OnDisconnect(func(remoteAddr string) { - err := n.eventBus.UnsubscribeAll(context.Background(), remoteAddr) - if err != nil && err != tmpubsub.ErrSubscriptionNotFound { - wmLogger.Error("Failed to unsubscribe addr from events", "addr", remoteAddr, "err", err) - } - }), - rpcserver.ReadLimit(config.MaxBodyBytes), - ) - wm.SetLogger(wmLogger) - mux.HandleFunc("/websocket", wm.WebsocketHandler) - rpcserver.RegisterRPCFuncs(mux, rpccore.Routes, rpcLogger) - listener, err := rpcserver.Listen( - listenAddr, - config, - ) - if err != nil { - return nil, err - } - - var rootHandler http.Handler = mux - if n.config.RPC.IsCorsEnabled() { - corsMiddleware := cors.New(cors.Options{ - AllowedOrigins: n.config.RPC.CORSAllowedOrigins, - AllowedMethods: n.config.RPC.CORSAllowedMethods, - AllowedHeaders: n.config.RPC.CORSAllowedHeaders, - }) - rootHandler = corsMiddleware.Handler(mux) - } - if n.config.RPC.IsTLSEnabled() { - go func() { - if err := rpcserver.ServeTLS( - listener, - rootHandler, - n.config.RPC.CertFile(), - n.config.RPC.KeyFile(), - rpcLogger, - config, - ); err != nil { - n.Logger.Error("Error serving server with TLS", "err", err) - } - }() - } else { - go func() { - if err := rpcserver.Serve( - listener, - rootHandler, - rpcLogger, - config, - ); err != nil { - n.Logger.Error("Error serving server", "err", err) - } - }() - } - - listeners[i] = listener - } - - // we expose a simplified api over grpc for convenience to app devs - grpcListenAddr := n.config.RPC.GRPCListenAddress - if grpcListenAddr != "" { - config := rpcserver.DefaultConfig() - config.MaxBodyBytes = n.config.RPC.MaxBodyBytes - config.MaxHeaderBytes = n.config.RPC.MaxHeaderBytes - // NOTE: GRPCMaxOpenConnections is used, not MaxOpenConnections - config.MaxOpenConnections = n.config.RPC.GRPCMaxOpenConnections - // If necessary adjust global WriteTimeout to ensure it's greater than - // TimeoutBroadcastTxCommit. - // See https://github.com/tendermint/tendermint/issues/3435 - if config.WriteTimeout <= n.config.RPC.TimeoutBroadcastTxCommit { - config.WriteTimeout = n.config.RPC.TimeoutBroadcastTxCommit + 1*time.Second - } - listener, err := rpcserver.Listen(grpcListenAddr, config) - if err != nil { - return nil, err - } - go func() { - if err := grpccore.StartGRPCServer(listener); err != nil { - n.Logger.Error("Error starting gRPC server", "err", err) - } - }() - listeners = append(listeners, listener) - } - - return listeners, nil -} - -// startPrometheusServer starts a Prometheus HTTP server, listening for metrics -// collectors on addr. -func (n *Node) startPrometheusServer(addr string) *http.Server { - srv := &http.Server{ - Addr: addr, - Handler: promhttp.InstrumentMetricHandler( - prometheus.DefaultRegisterer, promhttp.HandlerFor( - prometheus.DefaultGatherer, - promhttp.HandlerOpts{MaxRequestsInFlight: n.config.Instrumentation.MaxOpenConnections}, - ), - ), - } - go func() { - if err := srv.ListenAndServe(); err != http.ErrServerClosed { - // Error starting or closing listener: - n.Logger.Error("Prometheus HTTP server ListenAndServe", "err", err) - } - }() - return srv -} - -// Switch returns the Node's Switch. -func (n *Node) Switch() *p2p.Switch { - return n.sw -} - -// BlockStore returns the Node's BlockStore. -func (n *Node) BlockStore() *store.BlockStore { - return n.blockStore -} - -// ConsensusState returns the Node's ConsensusState. -func (n *Node) ConsensusState() *cs.State { - return n.consensusState -} - -// ConsensusReactor returns the Node's ConsensusReactor. -func (n *Node) ConsensusReactor() *cs.Reactor { - return n.consensusReactor -} - -// MempoolReactor returns the Node's mempool reactor. -func (n *Node) MempoolReactor() *mempl.Reactor { - return n.mempoolReactor -} - -// Mempool returns the Node's mempool. -func (n *Node) Mempool() mempl.Mempool { - return n.mempool -} - -// PEXReactor returns the Node's PEXReactor. It returns nil if PEX is disabled. -func (n *Node) PEXReactor() *pex.Reactor { - return n.pexReactor -} - -// EvidencePool returns the Node's EvidencePool. -func (n *Node) EvidencePool() *evidence.Pool { - return n.evidencePool -} - -// EventBus returns the Node's EventBus. -func (n *Node) EventBus() *types.EventBus { - return n.eventBus -} - -// PrivValidator returns the Node's PrivValidator. -// XXX: for convenience only! -func (n *Node) PrivValidator() types.PrivValidator { - return n.privValidator -} - -// GenesisDoc returns the Node's GenesisDoc. -func (n *Node) GenesisDoc() *types.GenesisDoc { - return n.genesisDoc -} - -// ProxyApp returns the Node's AppConns, representing its connections to the ABCI application. -func (n *Node) ProxyApp() proxy.AppConns { - return n.proxyApp -} - -// Config returns the Node's config. -func (n *Node) Config() *cfg.Config { - return n.config -} - -//------------------------------------------------------------------------------ - -func (n *Node) Listeners() []string { - return []string{ - fmt.Sprintf("Listener(@%v)", n.config.P2P.ExternalAddress), - } -} - -func (n *Node) IsListening() bool { - return n.isListening -} - -// NodeInfo returns the Node's Info from the Switch. -func (n *Node) NodeInfo() p2p.NodeInfo { - return n.nodeInfo -} - -func makeNodeInfo( - config *cfg.Config, - nodeKey *p2p.NodeKey, - txIndexer txindex.TxIndexer, - genDoc *types.GenesisDoc, - state sm.State, -) (p2p.NodeInfo, error) { - txIndexerStatus := "on" - if _, ok := txIndexer.(*null.TxIndex); ok { - txIndexerStatus = "off" - } - - var bcChannel byte - switch config.FastSync.Version { - case "v0": - bcChannel = bcv0.BlockchainChannel - case "v1": - bcChannel = bcv1.BlockchainChannel - case "v2": - bcChannel = bcv2.BlockchainChannel - default: - return nil, fmt.Errorf("unknown fastsync version %s", config.FastSync.Version) - } - - nodeInfo := p2p.DefaultNodeInfo{ - ProtocolVersion: p2p.NewProtocolVersion( - version.P2PProtocol, // global - state.Version.Consensus.Block, - state.Version.Consensus.App, - ), - DefaultNodeID: nodeKey.ID(), - Network: genDoc.ChainID, - Version: version.OCCoreSemVer, - Channels: []byte{ - bcChannel, - cs.StateChannel, cs.DataChannel, cs.VoteChannel, cs.VoteSetBitsChannel, - mempl.MempoolChannel, - evidence.EvidenceChannel, - statesync.SnapshotChannel, statesync.ChunkChannel, - }, - Moniker: config.Moniker, - Other: p2p.DefaultNodeInfoOther{ - TxIndex: txIndexerStatus, - RPCAddress: config.RPC.ListenAddress, - }, - } - - if config.P2P.PexReactor { - nodeInfo.Channels = append(nodeInfo.Channels, pex.PexChannel) - } - - lAddr := config.P2P.ExternalAddress - - if lAddr == "" { - lAddr = config.P2P.ListenAddress - } - - nodeInfo.ListenAddr = lAddr - - err := nodeInfo.Validate() - return nodeInfo, err -} - -//------------------------------------------------------------------------------ - -var ( - genesisDocKey = []byte("genesisDoc") -) - -// LoadStateFromDBOrGenesisDocProvider attempts to load the state from the -// database, or creates one using the given genesisDocProvider and persists the -// result to the database. On success this also returns the genesis doc loaded -// through the given provider. -func LoadStateFromDBOrGenesisDocProvider( - stateDB dbm.DB, - genesisDocProvider GenesisDocProvider, -) (sm.State, *types.GenesisDoc, error) { - // Get genesis doc - genDoc, err := loadGenesisDoc(stateDB) - if err != nil { - genDoc, err = genesisDocProvider() - if err != nil { - return sm.State{}, nil, err - } - // save genesis doc to prevent a certain class of user errors (e.g. when it - // was changed, accidentally or not). Also good for audit trail. - saveGenesisDoc(stateDB, genDoc) - } - stateStore := sm.NewStore(stateDB) - state, err := stateStore.LoadFromDBOrGenesisDoc(genDoc) - if err != nil { - return sm.State{}, nil, err - } - return state, genDoc, nil -} - -// panics if failed to unmarshal bytes -func loadGenesisDoc(db dbm.DB) (*types.GenesisDoc, error) { - b, err := db.Get(genesisDocKey) - if err != nil { - panic(err) - } - if len(b) == 0 { - return nil, errors.New("genesis doc not found") - } - var genDoc *types.GenesisDoc - err = tmjson.Unmarshal(b, &genDoc) - if err != nil { - panic(fmt.Sprintf("Failed to load genesis doc due to unmarshaling error: %v (bytes: %X)", err, b)) - } - return genDoc, nil -} - -// panics if failed to marshal the given genesis document -func saveGenesisDoc(db dbm.DB, genDoc *types.GenesisDoc) { - b, err := tmjson.Marshal(genDoc) - if err != nil { - panic(fmt.Sprintf("Failed to save genesis doc due to marshaling error: %v", err)) - } - if err := db.SetSync(genesisDocKey, b); err != nil { - panic(fmt.Sprintf("Failed to save genesis doc: %v", err)) - } -} - -func createAndStartPrivValidatorSocketClient( - listenAddr, - chainID string, - logger log.Logger, -) (types.PrivValidator, error) { - pve, err := privval.NewSignerListener(listenAddr, logger) - if err != nil { - return nil, fmt.Errorf("failed to start private validator: %w", err) - } - - pvsc, err := privval.NewSignerClient(pve, chainID) - if err != nil { - return nil, fmt.Errorf("failed to start private validator: %w", err) - } - - // try to get a pubkey from private validate first time - _, err = pvsc.GetPubKey() - if err != nil { - return nil, fmt.Errorf("can't get pubkey: %w", err) - } - - const ( - retries = 50 // 50 * 100ms = 5s total - timeout = 100 * time.Millisecond - ) - pvscWithRetries := privval.NewRetrySignerClient(pvsc, retries, timeout) - - return pvscWithRetries, nil -} - -// splitAndTrimEmpty slices s into all subslices separated by sep and returns a -// slice of the string s with all leading and trailing Unicode code points -// contained in cutset removed. If sep is empty, SplitAndTrim splits after each -// UTF-8 sequence. First part is equivalent to strings.SplitN with a count of -// -1. also filter out empty strings, only return non-empty strings. -func splitAndTrimEmpty(s, sep, cutset string) []string { - if s == "" { - return []string{} - } - - spl := strings.Split(s, sep) - nonEmptyStrings := make([]string, 0, len(spl)) - for i := 0; i < len(spl); i++ { - element := strings.Trim(spl[i], cutset) - if element != "" { - nonEmptyStrings = append(nonEmptyStrings, element) - } - } - return nonEmptyStrings -} diff --git a/test/maverick/node/privval.go b/test/maverick/node/privval.go deleted file mode 100644 index 9568ff986..000000000 --- a/test/maverick/node/privval.go +++ /dev/null @@ -1,385 +0,0 @@ -package node - -import ( - "errors" - "fmt" - "io/ioutil" - "strings" - - "github.com/line/ostracon/crypto" - "github.com/line/ostracon/crypto/bls" - "github.com/line/ostracon/crypto/composite" - "github.com/line/ostracon/crypto/ed25519" - tmbytes "github.com/line/ostracon/libs/bytes" - tmjson "github.com/line/ostracon/libs/json" - tmos "github.com/line/ostracon/libs/os" - "github.com/line/ostracon/libs/tempfile" - tmproto "github.com/line/ostracon/proto/ostracon/types" - "github.com/line/ostracon/types" -) - -// ******************************************************************************************************************* -// -// WARNING: FOR TESTING ONLY. DO NOT USE THIS FILE OUTSIDE MAVERICK -// -// ******************************************************************************************************************* - -const ( - stepNone int8 = 0 // Used to distinguish the initial state - stepPropose int8 = 1 - stepPrevote int8 = 2 - stepPrecommit int8 = 3 -) - -const ( - PrivKeyTypeEd25519 string = "ed25519" - PrivKeyTypeComposite string = "composite" -) - -// A vote is either stepPrevote or stepPrecommit. -func voteToStep(vote *tmproto.Vote) int8 { - switch vote.Type { - case tmproto.PrevoteType: - return stepPrevote - case tmproto.PrecommitType: - return stepPrecommit - default: - panic(fmt.Sprintf("Unknown vote type: %v", vote.Type)) - } -} - -//------------------------------------------------------------------------------- - -// FilePVKey stores the immutable part of PrivValidator. -type FilePVKey struct { - Address types.Address `json:"address"` - PubKey crypto.PubKey `json:"pub_key"` - PrivKey crypto.PrivKey `json:"priv_key"` - - filePath string -} - -// Save persists the FilePVKey to its filePath. -func (pvKey FilePVKey) Save() { - outFile := pvKey.filePath - if outFile == "" { - panic("cannot save PrivValidator key: filePath not set") - } - - jsonBytes, err := tmjson.MarshalIndent(pvKey, "", " ") - if err != nil { - panic(err) - } - err = tempfile.WriteFileAtomic(outFile, jsonBytes, 0600) - if err != nil { - panic(err) - } - -} - -//------------------------------------------------------------------------------- - -// FilePVLastSignState stores the mutable part of PrivValidator. -type FilePVLastSignState struct { - Height int64 `json:"height"` - Round int32 `json:"round"` - Step int8 `json:"step"` - Signature []byte `json:"signature,omitempty"` - SignBytes tmbytes.HexBytes `json:"signbytes,omitempty"` - - filePath string -} - -// CheckHRS checks the given height, round, step (HRS) against that of the -// FilePVLastSignState. It returns an error if the arguments constitute a regression, -// or if they match but the SignBytes are empty. -// The returned boolean indicates whether the last Signature should be reused - -// it returns true if the HRS matches the arguments and the SignBytes are not empty (indicating -// we have already signed for this HRS, and can reuse the existing signature). -// It panics if the HRS matches the arguments, there's a SignBytes, but no Signature. -func (lss *FilePVLastSignState) CheckHRS(height int64, round int32, step int8) (bool, error) { - - if lss.Height > height { - return false, fmt.Errorf("height regression. Got %v, last height %v", height, lss.Height) - } - - if lss.Height == height { - if lss.Round > round { - return false, fmt.Errorf("round regression at height %v. Got %v, last round %v", height, round, lss.Round) - } - - if lss.Round == round { - if lss.Step > step { - return false, fmt.Errorf( - "step regression at height %v round %v. Got %v, last step %v", - height, - round, - step, - lss.Step, - ) - } else if lss.Step == step { - if lss.SignBytes != nil { - if lss.Signature == nil { - panic("pv: Signature is nil but SignBytes is not!") - } - return true, nil - } - return false, errors.New("no SignBytes found") - } - } - } - return false, nil -} - -// Save persists the FilePvLastSignState to its filePath. -func (lss *FilePVLastSignState) Save() { - outFile := lss.filePath - if outFile == "" { - panic("cannot save FilePVLastSignState: filePath not set") - } - jsonBytes, err := tmjson.MarshalIndent(lss, "", " ") - if err != nil { - panic(err) - } - err = tempfile.WriteFileAtomic(outFile, jsonBytes, 0600) - if err != nil { - panic(err) - } -} - -//------------------------------------------------------------------------------- - -// FilePV implements PrivValidator using data persisted to disk -// to prevent double signing. -// NOTE: the directories containing pv.Key.filePath and pv.LastSignState.filePath must already exist. -// It includes the LastSignature and LastSignBytes so we don't lose the signature -// if the process crashes after signing but before the resulting consensus message is processed. -type FilePV struct { - Key FilePVKey - LastSignState FilePVLastSignState -} - -// NewFilePV generates a new validator from the given key and paths. -func NewFilePV(privKey crypto.PrivKey, keyFilePath, stateFilePath string) *FilePV { - return &FilePV{ - Key: FilePVKey{ - Address: privKey.PubKey().Address(), - PubKey: privKey.PubKey(), - PrivKey: privKey, - filePath: keyFilePath, - }, - LastSignState: FilePVLastSignState{ - Step: stepNone, - filePath: stateFilePath, - }, - } -} - -// GenFilePV generates a new validator with randomly generated private key -// and sets the filePaths, but does not call Save(). -func GenFilePV(keyFilePath, stateFilePath, privKeyType string) (filePV *FilePV, err error) { - var privKey crypto.PrivKey - switch strings.ToLower(privKeyType) { - case PrivKeyTypeEd25519: - privKey = ed25519.GenPrivKey() - case PrivKeyTypeComposite: - privKey = composite.NewPrivKeyComposite(bls.GenPrivKey(), ed25519.GenPrivKey()) - default: - return nil, fmt.Errorf("undefined private key type: %s", privKeyType) - } - return NewFilePV(privKey, keyFilePath, stateFilePath), nil -} - -// LoadFilePV loads a FilePV from the filePaths. The FilePV handles double -// signing prevention by persisting data to the stateFilePath. If either file path -// does not exist, the program will exit. -func LoadFilePV(keyFilePath, stateFilePath string) *FilePV { - return loadFilePV(keyFilePath, stateFilePath, true) -} - -// LoadFilePVEmptyState loads a FilePV from the given keyFilePath, with an empty LastSignState. -// If the keyFilePath does not exist, the program will exit. -func LoadFilePVEmptyState(keyFilePath, stateFilePath string) *FilePV { - return loadFilePV(keyFilePath, stateFilePath, false) -} - -// If loadState is true, we load from the stateFilePath. Otherwise, we use an empty LastSignState. -func loadFilePV(keyFilePath, stateFilePath string, loadState bool) *FilePV { - keyJSONBytes, err := ioutil.ReadFile(keyFilePath) - if err != nil { - tmos.Exit(err.Error()) - } - pvKey := FilePVKey{} - err = tmjson.Unmarshal(keyJSONBytes, &pvKey) - if err != nil { - tmos.Exit(fmt.Sprintf("Error reading PrivValidator key from %v: %v\n", keyFilePath, err)) - } - - // overwrite pubkey and address for convenience - pvKey.PubKey = pvKey.PrivKey.PubKey() - pvKey.Address = pvKey.PubKey.Address() - pvKey.filePath = keyFilePath - - pvState := FilePVLastSignState{} - - if loadState { - stateJSONBytes, err := ioutil.ReadFile(stateFilePath) - if err != nil { - tmos.Exit(err.Error()) - } - err = tmjson.Unmarshal(stateJSONBytes, &pvState) - if err != nil { - tmos.Exit(fmt.Sprintf("Error reading PrivValidator state from %v: %v\n", stateFilePath, err)) - } - } - - pvState.filePath = stateFilePath - - return &FilePV{ - Key: pvKey, - LastSignState: pvState, - } -} - -// LoadOrGenFilePV loads a FilePV from the given filePaths -// or else generates a new one and saves it to the filePaths. -func LoadOrGenFilePV(keyFilePath, stateFilePath, privKeyType string) (pv *FilePV, err error) { - if tmos.FileExists(keyFilePath) { - pv = LoadFilePV(keyFilePath, stateFilePath) - err = nil - } else { - pv, err = GenFilePV(keyFilePath, stateFilePath, privKeyType) - if pv != nil { - pv.Save() - } - } - return pv, err -} - -// GetAddress returns the address of the validator. -// Implements PrivValidator. -func (pv *FilePV) GetAddress() types.Address { - return pv.Key.Address -} - -// GetPubKey returns the public key of the validator. -// Implements PrivValidator. -func (pv *FilePV) GetPubKey() (crypto.PubKey, error) { - return pv.Key.PubKey, nil -} - -// SignVote signs a canonical representation of the vote, along with the -// chainID. Implements PrivValidator. -func (pv *FilePV) SignVote(chainID string, vote *tmproto.Vote) error { - if err := pv.signVote(chainID, vote); err != nil { - return fmt.Errorf("error signing vote: %v", err) - } - return nil -} - -// SignProposal signs a canonical representation of the proposal, along with -// the chainID. Implements PrivValidator. -func (pv *FilePV) SignProposal(chainID string, proposal *tmproto.Proposal) error { - if err := pv.signProposal(chainID, proposal); err != nil { - return fmt.Errorf("error signing proposal: %v", err) - } - return nil -} - -// GenerateVRFProof generates a proof for specified message. -func (pv *FilePV) GenerateVRFProof(message []byte) (crypto.Proof, error) { - return pv.Key.PrivKey.VRFProve(message) -} - -// Save persists the FilePV to disk. -func (pv *FilePV) Save() { - pv.Key.Save() - pv.LastSignState.Save() -} - -// Reset resets all fields in the FilePV. -// NOTE: Unsafe! -func (pv *FilePV) Reset() { - var sig []byte - pv.LastSignState.Height = 0 - pv.LastSignState.Round = 0 - pv.LastSignState.Step = 0 - pv.LastSignState.Signature = sig - pv.LastSignState.SignBytes = nil - pv.Save() -} - -// String returns a string representation of the FilePV. -func (pv *FilePV) String() string { - return fmt.Sprintf( - "PrivValidator{%v LH:%v, LR:%v, LS:%v}", - pv.GetAddress(), - pv.LastSignState.Height, - pv.LastSignState.Round, - pv.LastSignState.Step, - ) -} - -//------------------------------------------------------------------------------------ - -// signVote checks if the vote is good to sign and sets the vote signature. -// It may need to set the timestamp as well if the vote is otherwise the same as -// a previously signed vote (ie. we crashed after signing but before the vote hit the WAL). -func (pv *FilePV) signVote(chainID string, vote *tmproto.Vote) error { - height, round, step := vote.Height, vote.Round, voteToStep(vote) - - lss := pv.LastSignState - - _, err := lss.CheckHRS(height, round, step) - if err != nil { - return err - } - - signBytes := types.VoteSignBytes(chainID, vote) - - // It passed the checks. Sign the vote - sig, err := pv.Key.PrivKey.Sign(signBytes) - if err != nil { - return err - } - pv.saveSigned(height, round, step, signBytes, sig) - vote.Signature = sig - return nil -} - -// signProposal checks if the proposal is good to sign and sets the proposal signature. -// It may need to set the timestamp as well if the proposal is otherwise the same as -// a previously signed proposal ie. we crashed after signing but before the proposal hit the WAL). -func (pv *FilePV) signProposal(chainID string, proposal *tmproto.Proposal) error { - height, round, step := proposal.Height, proposal.Round, stepPropose - - lss := pv.LastSignState - - _, err := lss.CheckHRS(height, round, step) - if err != nil { - return err - } - - signBytes := types.ProposalSignBytes(chainID, proposal) - - // It passed the checks. Sign the proposal - sig, err := pv.Key.PrivKey.Sign(signBytes) - if err != nil { - return err - } - pv.saveSigned(height, round, step, signBytes, sig) - proposal.Signature = sig - return nil -} - -// Persist height/round/step and signature -func (pv *FilePV) saveSigned(height int64, round int32, step int8, - signBytes []byte, sig []byte) { - - pv.LastSignState.Height = height - pv.LastSignState.Round = round - pv.LastSignState.Step = step - pv.LastSignState.Signature = sig - pv.LastSignState.SignBytes = signBytes - pv.LastSignState.Save() -} From a4154f5d2a434dd182e6ed6790754ca04fe8b081 Mon Sep 17 00:00:00 2001 From: tnasu Date: Wed, 21 Dec 2022 20:33:58 +0900 Subject: [PATCH 2/6] Fix the usage of version --- .github/workflows/e2e.yml | 2 +- test/e2e/Makefile | 7 ++++++- test/e2e/runner/cleanup.go | 6 +++--- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 29f96ba88..04d8aa2a2 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -9,7 +9,7 @@ on: - release/** env: - TAG: ostracon/e2e-node + TAG: ostracon/e2e-node:local-version # See test/e2e/Makefile:docker CACHE_DIR: /tmp/ostracon/e2etest jobs: diff --git a/test/e2e/Makefile b/test/e2e/Makefile index 86ce05f8c..900028d5a 100644 --- a/test/e2e/Makefile +++ b/test/e2e/Makefile @@ -1,7 +1,12 @@ all: docker generator runner docker: - docker build --progress=plain --tag ostracon/e2e-node --tag ostracon/e2e-node:local-version -f docker/Dockerfile ../.. + # See the default version on test/e2e/pkg/testnet.go:LoadTestnet + # Relate the "docker-compose.yml" on test/e2e/pkg/infra/docker/docker.go:dockerComposeBytes + # Relate the "cleanup" on test/e2e/runner/cleanup.go:cleanupDir + docker build --progress=plain \ + --tag ostracon/e2e-node:local-version \ + -f docker/Dockerfile ../.. # We need to build support for database backends into the app in # order to build a binary with an Ostracon node in it (for built-in diff --git a/test/e2e/runner/cleanup.go b/test/e2e/runner/cleanup.go index 39a05a4e6..2865af79e 100644 --- a/test/e2e/runner/cleanup.go +++ b/test/e2e/runner/cleanup.go @@ -16,7 +16,7 @@ func Cleanup(testnet *e2e.Testnet) error { if err != nil { return err } - err = cleanupDir(testnet.Dir) + err = cleanupDir(testnet.Dir, testnet.Nodes[0].Version) if err != nil { return err } @@ -48,7 +48,7 @@ func cleanupDocker() error { } // cleanupDir cleans up a testnet directory -func cleanupDir(dir string) error { +func cleanupDir(dir, version string) error { if dir == "" { return errors.New("no directory set") } @@ -70,7 +70,7 @@ func cleanupDir(dir string) error { return err } err = execDocker("run", "--rm", "--entrypoint", "", "-v", fmt.Sprintf("%v:/network", absDir), - "ostracon/e2e-node", "sh", "-c", "rm -rf /network/*/") + fmt.Sprintf("ostracon/e2e-node:%s", version), "sh", "-c", "rm -rf /network/*/") if err != nil { return err } From 726c88c10184b53a47e792a5967cb9fd0886c470 Mon Sep 17 00:00:00 2001 From: tnasu Date: Wed, 21 Dec 2022 20:34:31 +0900 Subject: [PATCH 3/6] fix code coverage --- libs/log/lazy_test.go | 29 ++++++++++++++++ test/e2e/generator/generate_test.go | 30 +++++++++++++++++ test/e2e/generator/main_test.go | 52 +++++++++++++++++++++++++++++ 3 files changed, 111 insertions(+) create mode 100644 libs/log/lazy_test.go create mode 100644 test/e2e/generator/generate_test.go create mode 100644 test/e2e/generator/main_test.go diff --git a/libs/log/lazy_test.go b/libs/log/lazy_test.go new file mode 100644 index 000000000..296280b72 --- /dev/null +++ b/libs/log/lazy_test.go @@ -0,0 +1,29 @@ +package log + +import ( + tmbytes "github.com/line/ostracon/libs/bytes" + "github.com/stretchr/testify/require" + "testing" +) + +func TestNewLazySprintf(t *testing.T) { + format := "echo:%s" + args := make([]interface{}, 0, 1) + args = append(args, "hello") + expected := LazySprintf{format: format, args: args} + actual := NewLazySprintf(format, args...) + require.Equal(t, expected.String(), actual.String()) +} + +func TestNewLazyBlockHash(t *testing.T) { + block := testHashable{} + expected := LazyBlockHash{block: block} + actual := NewLazyBlockHash(block) + require.Equal(t, expected.String(), actual.String()) +} + +type testHashable struct{} + +func (testHashable) Hash() tmbytes.HexBytes { + return []byte{0} +} diff --git a/test/e2e/generator/generate_test.go b/test/e2e/generator/generate_test.go new file mode 100644 index 000000000..b87e1dcfe --- /dev/null +++ b/test/e2e/generator/generate_test.go @@ -0,0 +1,30 @@ +package main + +import ( + "github.com/stretchr/testify/require" + "math/rand" + "testing" +) + +func TestGenerate(t *testing.T) { + testcases := []struct { + name string + version string + }{ + { + name: "empty version", + version: "", + }, + { + name: "specify version", + version: "2", + }, + } + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + manifests, err := Generate(rand.New(rand.NewSource(randomSeed)), tc.version) + require.NoError(t, err) + require.NotNil(t, manifests) + }) + } +} diff --git a/test/e2e/generator/main_test.go b/test/e2e/generator/main_test.go new file mode 100644 index 000000000..c645c2af0 --- /dev/null +++ b/test/e2e/generator/main_test.go @@ -0,0 +1,52 @@ +package main + +import ( + "github.com/stretchr/testify/require" + "io/ioutil" + "os" + "testing" +) + +func TestNewCLI(t *testing.T) { + tempDir, err := ioutil.TempDir("", "runner") + require.NoError(t, err) + defer os.RemoveAll(tempDir) //nolint:staticcheck + cmd := NewCLI() + testcases := []struct { + name string + wantErr bool + args []string + }{ + { + name: "default", + wantErr: true, + args: []string{ + "-d", tempDir, + }, + }, + { + name: "specify groups", + wantErr: true, + args: []string{ + "-d", tempDir, + "-g", "1", + }, + }, + { + name: "specify version", + wantErr: true, + args: []string{ + "-d", tempDir, + "-m", "1", + }, + }, + } + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + err := cmd.root.ParseFlags(tc.args) + require.NoError(t, err) + cmd.Run() + }) + } +} From 427ca75af8ea90d3ced7875c1f40dd4fef7accc6 Mon Sep 17 00:00:00 2001 From: Toshimasa Nasu Date: Thu, 22 Dec 2022 12:40:17 +0900 Subject: [PATCH 4/6] Update libs/log/ocfmt_logger_test.go --- libs/log/ocfmt_logger_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/log/ocfmt_logger_test.go b/libs/log/ocfmt_logger_test.go index b954fe084..f9a941ad1 100644 --- a/libs/log/ocfmt_logger_test.go +++ b/libs/log/ocfmt_logger_test.go @@ -61,7 +61,7 @@ func TestOCFmtLogger(t *testing.T) { assert.Regexp(t, regexp.MustCompile(`N\[.+\] unknown \s+ hash=74657374206D65\n$`), buf.String()) } -func BenchmarkTMFmtLoggerSimple(b *testing.B) { +func BenchmarkOCFmtLoggerSimple(b *testing.B) { benchmarkRunnerKitlog(b, log.NewOCFmtLogger(io.Discard), baseMessage) } From feacff7ea81e97eb3de0c908991b7129a20e3218 Mon Sep 17 00:00:00 2001 From: Toshimasa Nasu Date: Thu, 22 Dec 2022 12:40:25 +0900 Subject: [PATCH 5/6] Update libs/log/ocfmt_logger_test.go --- libs/log/ocfmt_logger_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/log/ocfmt_logger_test.go b/libs/log/ocfmt_logger_test.go index f9a941ad1..17e7a57b1 100644 --- a/libs/log/ocfmt_logger_test.go +++ b/libs/log/ocfmt_logger_test.go @@ -65,7 +65,7 @@ func BenchmarkOCFmtLoggerSimple(b *testing.B) { benchmarkRunnerKitlog(b, log.NewOCFmtLogger(io.Discard), baseMessage) } -func BenchmarkTMFmtLoggerContextual(b *testing.B) { +func BenchmarkOCFmtLoggerContextual(b *testing.B) { benchmarkRunnerKitlog(b, log.NewOCFmtLogger(io.Discard), withMessage) } From 8b6b7a681a9833b1f72aa9bcf83b2238ff9c510e Mon Sep 17 00:00:00 2001 From: Toshimasa Nasu Date: Thu, 22 Dec 2022 12:40:30 +0900 Subject: [PATCH 6/6] Update test/e2e/generator/main_test.go --- test/e2e/generator/main_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e/generator/main_test.go b/test/e2e/generator/main_test.go index c645c2af0..3a55525ea 100644 --- a/test/e2e/generator/main_test.go +++ b/test/e2e/generator/main_test.go @@ -8,7 +8,7 @@ import ( ) func TestNewCLI(t *testing.T) { - tempDir, err := ioutil.TempDir("", "runner") + tempDir, err := ioutil.TempDir("", "generator") require.NoError(t, err) defer os.RemoveAll(tempDir) //nolint:staticcheck cmd := NewCLI()