Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(baseapp): signal then panic at halt-height #338

Merged
merged 4 commits into from
Aug 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion CHANGELOG-Agoric.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ Ref: https://keepachangelog.com/en/1.0.0/

* (auth, bank) Agoric/agoric-sdk#8989 Remove deprecated lien support

### Bug Fixes

* (baseapp) [#338](https://github.com/agoric-labs/cosmos-sdk/pull/338) Make sure we don't execute blocks beyond the halt height. Restored from [#305](https://github.com/agoric-labs/cosmos-sdk/pull/305) but compatible with older `SIGINT`, `SIGTERM` logic

## `v0.46.16-alpha.agoric.2.4` - 2024-04-19

### Improvements
Expand Down Expand Up @@ -99,7 +103,7 @@ Ref: https://keepachangelog.com/en/1.0.0/

### Bug Fixes

* (baseapp) [#337](https://github.com/agoric-labs/cosmos-sdk/pull/337) revert #305 which causes test failures in agoric-sdk
* (baseapp) [#337](https://github.com/agoric-labs/cosmos-sdk/pull/337) revert [#305](https://github.com/agoric-labs/cosmos-sdk/pull/305) which causes test failures in agoric-sdk

## `v0.45.16-alpha.agoric.1` - 2023-09-22

Expand Down
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ test-cover:

test-rosetta:
docker build -t rosetta-ci:latest -f contrib/rosetta/rosetta-ci/Dockerfile .
docker-compose -f contrib/rosetta/docker-compose.yaml up --abort-on-container-exit --exit-code-from test_rosetta --build
docker compose -f contrib/rosetta/docker-compose.yaml up --abort-on-container-exit --exit-code-from test_rosetta --build
.PHONY: test-rosetta

benchmark:
Expand Down Expand Up @@ -467,10 +467,10 @@ localnet-build-dlv:
localnet-build-nodes:
$(DOCKER) run --rm -v $(CURDIR)/.testnets:/data cosmossdk/simd \
testnet init-files --v 4 -o /data --starting-ip-address 192.168.10.2 --keyring-backend=test
docker-compose up -d
docker compose up -d

localnet-stop:
docker-compose down
docker compose down

# localnet-start will run a 4-node testnet locally. The nodes are
# based off the docker images in: ./contrib/images/simd-env
Expand Down
83 changes: 43 additions & 40 deletions baseapp/abci.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@
))
}

app.checkHalt(req.Header.Height, req.Header.Time)
Dismissed Show dismissed Hide dismissed

if err := app.validateHeight(req); err != nil {
panic(err)
}
Expand Down Expand Up @@ -272,7 +274,7 @@
// deliverTxWithoutEventHistory is the upstream cosmos-sdk DeliverTx.
res = app.deliverTxWithoutEventHistory(req)
// When successful, remember event history.
if res.Code == sdkerrors.SuccessABCICode {

Check failure on line 277 in baseapp/abci.go

View workflow job for this annotation

GitHub Actions / Analyze

SA1019: sdkerrors.SuccessABCICode is deprecated: functionality of this package has been moved to it's own module: (staticcheck)
app.deliverState.eventHistory = append(app.deliverState.eventHistory, res.Events...)
}
return res
Expand Down Expand Up @@ -312,6 +314,47 @@
}
}

// checkHalt forces a state machine halt and attempts to kill the current
// process if block height or timestamp exceeds halt-height or halt-time
// respectively.
func (app *BaseApp) checkHalt(blockHeight int64, blockTime time.Time) {
var halt bool
if app.haltHeight > 0 && uint64(blockHeight) > app.haltHeight {
// height to halt has passed
halt = true
} else if app.haltTime > 0 && blockTime.Unix() > int64(app.haltTime) {
// time to halt has passed
halt = true
}

if !halt {
return
}

app.logger.Info(
"halt per configuration",
"haltHeight", app.haltHeight,
"haltTime", app.haltTime,
"blockHeight", blockHeight,
"blockTime", blockTime,
)

// [AGORIC] Make a best-effort attempt to kill our process.
p, err := os.FindProcess(os.Getpid())
if err == nil {
// attempt cascading signals in case SIGINT fails (os dependent)
_ = p.Signal(syscall.SIGINT)
_ = p.Signal(syscall.SIGTERM)
// Errors in these signal calls are not meaningful to us. We tried our
// best, but we don't care (and can't tell) if or how the signal handler
// responds.
}

// Prevent the state machine from advancing to the next block, no matter how
// the signals were handled.
panic(errors.New("halt application"))
Dismissed Show dismissed Hide dismissed
}

// Commit implements the ABCI interface. It will commit all state that exists in
// the deliver state's multi-store and includes the resulting commit ID in the
// returned abci.ResponseCommit. Commit will set the check state based on the
Expand Down Expand Up @@ -368,53 +411,13 @@
// empty/reset the deliver state
app.deliverState = nil

var halt bool

switch {
case app.haltHeight > 0 && uint64(header.Height) >= app.haltHeight:
halt = true

case app.haltTime > 0 && header.Time.Unix() >= int64(app.haltTime):
halt = true
}

if halt {
// Halt the binary and allow Tendermint to receive the ResponseCommit
// response with the commit ID hash. This will allow the node to successfully
// restart and process blocks assuming the halt configuration has been
// reset or moved to a more distant value.
app.halt()
}

if app.snapshotManager.ShouldTakeSnapshot(header.Height) {
snapshotHeight = header.Height
}

return res, snapshotHeight
}

// halt attempts to gracefully shutdown the node via SIGINT and SIGTERM falling
// back on os.Exit if both fail.
func (app *BaseApp) halt() {
app.logger.Info("halting node per configuration", "height", app.haltHeight, "time", app.haltTime)

p, err := os.FindProcess(os.Getpid())
if err == nil {
// attempt cascading signals in case SIGINT fails (os dependent)
sigIntErr := p.Signal(syscall.SIGINT)
sigTermErr := p.Signal(syscall.SIGTERM)

if sigIntErr == nil || sigTermErr == nil {
return
}
}

// Resort to exiting immediately if the process could not be found or killed
// via SIGINT/SIGTERM signals.
app.logger.Info("failed to send SIGINT/SIGTERM; exiting...")
os.Exit(0)
}

// Snapshot takes a snapshot of the current state and prunes any old snapshottypes.
// It should be started as a goroutine
func (app *BaseApp) Snapshot(height int64) {
Expand Down
73 changes: 73 additions & 0 deletions baseapp/abci_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@ package baseapp
import (
"encoding/json"
"fmt"
"os"
"os/signal"
"strings"
"syscall"
"testing"
"time"

"github.com/stretchr/testify/require"
abci "github.com/tendermint/tendermint/abci/types"
Expand Down Expand Up @@ -224,3 +229,71 @@ func (ps *paramStore) Get(_ sdk.Context, key []byte, ptr interface{}) {
panic(err)
}
}

func TestABCI_HaltChain(t *testing.T) {
logger := defaultLogger()
db := dbm.NewMemDB()
name := t.Name()

testCases := []struct {
name string
haltHeight uint64
haltTime uint64
blockHeight int64
blockTime int64
expHalt bool
}{
{"default", 0, 0, 10, 0, false},
{"halt-height-edge", 10, 0, 10, 0, false},
{"halt-height", 10, 0, 11, 0, true},
{"halt-time-edge", 0, 10, 1, 10, false},
{"halt-time", 0, 10, 1, 11, true},
}

sigs := make(chan os.Signal, 5)
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
if tc.expHalt {
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
}

defer func() {
rec := recover()
signal.Stop(sigs)
var err error
if rec != nil {
err = rec.(error)
}
if !tc.expHalt {
require.NoError(t, err)
} else {
// ensure that we received the correct signals
require.Equal(t, syscall.SIGINT, <-sigs)
require.Equal(t, syscall.SIGTERM, <-sigs)
require.Equal(t, len(sigs), 0)

// Check our error message.
require.Error(t, err)
require.True(t, strings.HasPrefix(err.Error(), "halt application"))
}
}()

app := NewBaseApp(
name, logger, db, nil,
SetHaltHeight(tc.haltHeight),
SetHaltTime(tc.haltTime),
)

app.InitChain(abci.RequestInitChain{
InitialHeight: tc.blockHeight,
})

app.BeginBlock(abci.RequestBeginBlock{
Header: tmproto.Header{
Height: tc.blockHeight,
Time: time.Unix(tc.blockTime, 0),
},
})
})
}
}
Loading