From 68e5e08f5af1fd8a68cc7d5a6fbb2778f6e4fb9c Mon Sep 17 00:00:00 2001 From: tnasu Date: Mon, 28 Feb 2022 12:22:29 +0900 Subject: [PATCH 01/13] Reduce p2p log noise (#7465) * reduce some logs * reduce error logs * remove debug Co-authored-by: Marko --- consensus/reactor.go | 4 ---- p2p/pex/pex_reactor.go | 8 +++----- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/consensus/reactor.go b/consensus/reactor.go index b85073b99..5f3d2de8b 100644 --- a/consensus/reactor.go +++ b/consensus/reactor.go @@ -500,7 +500,6 @@ OUTER_LOOP: for { // Manage disconnects from self or peer. if !peer.IsRunning() || !conR.IsRunning() { - logger.Info("Stopping gossipDataRoutine for peer") return } rs := conR.conS.GetRoundState() @@ -644,7 +643,6 @@ OUTER_LOOP: for { // Manage disconnects from self or peer. if !peer.IsRunning() || !conR.IsRunning() { - logger.Info("Stopping gossipVotesRoutine for peer") return } rs := conR.conS.GetRoundState() @@ -771,13 +769,11 @@ func (conR *Reactor) gossipVotesForHeight( // NOTE: `queryMaj23Routine` has a simple crude design since it only comes // into play for liveness when there's a signature DDoS attack happening. func (conR *Reactor) queryMaj23Routine(peer p2p.Peer, ps *PeerState) { - logger := conR.Logger.With("peer", peer) OUTER_LOOP: for { // Manage disconnects from self or peer. if !peer.IsRunning() || !conR.IsRunning() { - logger.Info("Stopping queryMaj23Routine for peer") return } diff --git a/p2p/pex/pex_reactor.go b/p2p/pex/pex_reactor.go index 4008e6dc5..54fb92957 100644 --- a/p2p/pex/pex_reactor.go +++ b/p2p/pex/pex_reactor.go @@ -397,7 +397,6 @@ func (r *Reactor) ReceiveAddrs(addrs []*p2p.NetAddress, src Peer) error { // If this address came from a seed node, try to connect to it without // waiting (#2093) if srcIsSeed { - r.Logger.Info("Will dial address, which came from seed", "addr", netAddr, "seed", srcAddr) go func(addr *p2p.NetAddress) { err := r.dialPeer(addr) if err != nil { @@ -405,7 +404,7 @@ func (r *Reactor) ReceiveAddrs(addrs []*p2p.NetAddress, src Peer) error { case errMaxAttemptsToDial, errTooEarlyToDial, p2p.ErrCurrentlyDialingOrExistingAddress: r.Logger.Debug(err.Error(), "addr", addr) default: - r.Logger.Error(err.Error(), "addr", addr) + r.Logger.Debug(err.Error(), "addr", addr) } } }(netAddr) @@ -501,7 +500,6 @@ func (r *Reactor) ensurePeers() { // TODO: consider moving some checks from toDial into here // so we don't even consider dialing peers that we want to wait // before dialling again, or have dialed too many times already - r.Logger.Info("Will dial address", "addr", try) toDial[try.ID] = try } @@ -514,7 +512,7 @@ func (r *Reactor) ensurePeers() { case errMaxAttemptsToDial, errTooEarlyToDial: r.Logger.Debug(err.Error(), "addr", addr) default: - r.Logger.Error(err.Error(), "addr", addr) + r.Logger.Debug(err.Error(), "addr", addr) } } }(addr) @@ -727,7 +725,7 @@ func (r *Reactor) crawlPeers(addrs []*p2p.NetAddress) { case errMaxAttemptsToDial, errTooEarlyToDial, p2p.ErrCurrentlyDialingOrExistingAddress: r.Logger.Debug(err.Error(), "addr", addr) default: - r.Logger.Error(err.Error(), "addr", addr) + r.Logger.Debug(err.Error(), "addr", addr) } continue } From 37a574c8a578def660eff37a8fd4f9b57909989e Mon Sep 17 00:00:00 2001 From: tnasu Date: Mon, 28 Feb 2022 12:29:49 +0900 Subject: [PATCH 02/13] consensus: calculate prevote message delay metric (backport #7551) (#7617) * consensus: calculate prevote message delay metric (#7551) This pull requests adds two metrics intended for use in calculating an experimental value for `MessageDelay`. The metrics are as follows: ``` tendermint_consensus_complete_prevote_message_delay{chain_id="test-chain-aZbwF1"} 0.013025505 tendermint_consensus_quorum_prevote_message_delay{chain_id="test-chain-aZbwF1"} 0.013025505 ``` For more information on what these metrics are calculating, see #7202. The aim is to merge to backport these metrics to v0.34 and run nodes on a few popular chains with these metrics to determine the experimental values for `MessageDelay` on these popular chains and use these to select our default `SynchronyParams.MessageDelay` value. Gauges allow us to overwrite the metric on each successive observation. We can then capture these metrics over time to track the highest and lowest observed value. (cherry picked from commit 0c82ceaa5f7964c13247af9b64d72477af9dc973) * fix merge conflicts Co-authored-by: William Banfield <4561443+williambanfield@users.noreply.github.com> Co-authored-by: William Banfield --- consensus/metrics.go | 48 ++++++++++++++++++++++++++++++++++++-------- consensus/state.go | 23 +++++++++++++++++++++ types/vote_set.go | 17 ++++++++++++++++ 3 files changed, 80 insertions(+), 8 deletions(-) diff --git a/consensus/metrics.go b/consensus/metrics.go index 68bd35417..4b79b80d6 100644 --- a/consensus/metrics.go +++ b/consensus/metrics.go @@ -4,7 +4,7 @@ import ( "github.com/go-kit/kit/metrics" "github.com/go-kit/kit/metrics/discard" - prometheus "github.com/go-kit/kit/metrics/prometheus" + "github.com/go-kit/kit/metrics/prometheus" stdprometheus "github.com/prometheus/client_golang/prometheus" ) @@ -66,6 +66,22 @@ type Metrics struct { // Number of blockparts transmitted by peer. BlockParts metrics.Counter + // QuroumPrevoteMessageDelay is the interval in seconds between the proposal + // timestamp and the timestamp of the earliest prevote that achieved a quorum + // during the prevote step. + // + // To compute it, sum the voting power over each prevote received, in increasing + // order of timestamp. The timestamp of the first prevote to increase the sum to + // be above 2/3 of the total voting power of the network defines the endpoint + // the endpoint of the interval. Subtract the proposal timestamp from this endpoint + // to obtain the quorum delay. + QuorumPrevoteMessageDelay metrics.Gauge + + // FullPrevoteMessageDelay is the interval in seconds between the proposal + // timestamp and the timestamp of the latest prevote in a round where 100% + // of the voting power on the network issued prevotes. + FullPrevoteMessageDelay metrics.Gauge + // //////////////////////////////////// // Metrics for measuring performance // //////////////////////////////////// @@ -230,6 +246,20 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics { Name: "block_parts", Help: "Number of blockparts transmitted by peer.", }, append(labels, "peer_id")).With(labelsAndValues...), + QuorumPrevoteMessageDelay: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "quorum_prevote_message_delay", + Help: "Difference in seconds between the proposal timestamp and the timestamp " + + "of the latest prevote that achieved a quorum in the prevote step.", + }, labels).With(labelsAndValues...), + FullPrevoteMessageDelay: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: MetricsSubsystem, + Name: "full_prevote_message_delay", + Help: "Difference in seconds between the proposal timestamp and the timestamp " + + "of the latest prevote that achieved 100% of the voting power in the prevote step.", + }, labels).With(labelsAndValues...), MissingProposal: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{ Namespace: namespace, Subsystem: MetricsSubsystem, @@ -359,13 +389,15 @@ func NopMetrics() *Metrics { BlockIntervalSeconds: discard.NewGauge(), - NumTxs: discard.NewGauge(), - BlockSizeBytes: discard.NewGauge(), - TotalTxs: discard.NewGauge(), - CommittedHeight: discard.NewGauge(), - FastSyncing: discard.NewGauge(), - StateSyncing: discard.NewGauge(), - BlockParts: discard.NewCounter(), + NumTxs: discard.NewGauge(), + BlockSizeBytes: discard.NewGauge(), + TotalTxs: discard.NewGauge(), + CommittedHeight: discard.NewGauge(), + FastSyncing: discard.NewGauge(), + StateSyncing: discard.NewGauge(), + BlockParts: discard.NewCounter(), + QuorumPrevoteMessageDelay: discard.NewGauge(), + FullPrevoteMessageDelay: discard.NewGauge(), MissingProposal: discard.NewGauge(), RoundFailures: discard.NewHistogram(), diff --git a/consensus/state.go b/consensus/state.go index 0bcc8bb41..96c1622a6 100644 --- a/consensus/state.go +++ b/consensus/state.go @@ -6,6 +6,7 @@ import ( "io/ioutil" "os" "runtime/debug" + "sort" "time" "github.com/gogo/protobuf/proto" @@ -1622,6 +1623,8 @@ func (cs *State) finalizeCommit(height int64) { return } + cs.calculatePrevoteMessageDelayMetrics() + blockID, ok := cs.Votes.Precommits(cs.CommitRound).TwoThirdsMajority() block, blockParts := cs.ProposalBlock, cs.ProposalBlockParts @@ -2393,6 +2396,26 @@ func (cs *State) checkDoubleSigningRisk(height int64) error { return nil } +func (cs *State) calculatePrevoteMessageDelayMetrics() { + ps := cs.Votes.Prevotes(cs.Round) + pl := ps.List() + sort.Slice(pl, func(i, j int) bool { + return pl[i].Timestamp.Before(pl[j].Timestamp) + }) + var votingPowerSeen int64 + for _, v := range pl { + _, val := cs.Validators.GetByAddress(v.ValidatorAddress) + votingPowerSeen += val.VotingPower + if votingPowerSeen >= cs.Validators.TotalVotingPower()*2/3+1 { + cs.metrics.QuorumPrevoteMessageDelay.Set(v.Timestamp.Sub(cs.Proposal.Timestamp).Seconds()) + break + } + } + if ps.HasAll() { + cs.metrics.FullPrevoteMessageDelay.Set(pl[len(pl)-1].Timestamp.Sub(cs.Proposal.Timestamp).Seconds()) + } +} + //--------------------------------------------------------- func CompareHRS(h1 int64, r1 int32, s1 cstypes.RoundStepType, h2 int64, r2 int32, s2 cstypes.RoundStepType) int { diff --git a/types/vote_set.go b/types/vote_set.go index ef94c63c0..8a53f6e92 100644 --- a/types/vote_set.go +++ b/types/vote_set.go @@ -407,6 +407,20 @@ func (voteSet *VoteSet) GetByIndex(valIndex int32) *Vote { return voteSet.votes[valIndex] } +// List returns a copy of the list of votes stored by the VoteSet. +func (voteSet *VoteSet) List() []Vote { + if voteSet == nil || voteSet.votes == nil { + return nil + } + votes := make([]Vote, 0, len(voteSet.votes)) + for i := range voteSet.votes { + if voteSet.votes[i] != nil { + votes = append(votes, *voteSet.votes[i]) + } + } + return votes +} + func (voteSet *VoteSet) GetByAddress(address []byte) *Vote { if voteSet == nil { return nil @@ -452,6 +466,9 @@ func (voteSet *VoteSet) HasTwoThirdsAny() bool { } func (voteSet *VoteSet) HasAll() bool { + if voteSet == nil { + return false + } voteSet.mtx.Lock() defer voteSet.mtx.Unlock() return voteSet.sum == voteSet.voterSet.TotalVotingPower() From c6d0882eb7d374410d7fd564fddff09e9ef70308 Mon Sep 17 00:00:00 2001 From: tnasu Date: Mon, 28 Feb 2022 13:00:42 +0900 Subject: [PATCH 03/13] Fix compile error: `consensus: check proposal non-nil in prevote message delay metric (backport #7625) (#7631)` --- consensus/state.go | 6 +++--- types/validator_set.go | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/consensus/state.go b/consensus/state.go index 96c1622a6..ea0212f91 100644 --- a/consensus/state.go +++ b/consensus/state.go @@ -2404,9 +2404,9 @@ func (cs *State) calculatePrevoteMessageDelayMetrics() { }) var votingPowerSeen int64 for _, v := range pl { - _, val := cs.Validators.GetByAddress(v.ValidatorAddress) - votingPowerSeen += val.VotingPower - if votingPowerSeen >= cs.Validators.TotalVotingPower()*2/3+1 { + _, voter := cs.Voters.GetByAddress(v.ValidatorAddress) + votingPowerSeen += voter.VotingPower + if votingPowerSeen >= cs.Voters.TotalVotingPower()*2/3+1 { cs.metrics.QuorumPrevoteMessageDelay.Set(v.Timestamp.Sub(cs.Proposal.Timestamp).Seconds()) break } diff --git a/types/validator_set.go b/types/validator_set.go index 9074cdf89..ed16f98dc 100644 --- a/types/validator_set.go +++ b/types/validator_set.go @@ -331,8 +331,8 @@ func (vals *ValidatorSet) updateTotalStakingPower() { vals.totalStakingPower = sum } -// TotalStakingPower returns the sum of the voting powers of all validators. -// It recomputes the total voting power if required. +// TotalStakingPower returns the sum of the staking powers of all validators. +// It recomputes the total staking power if required. func (vals *ValidatorSet) TotalStakingPower() int64 { if vals.totalStakingPower == 0 { vals.updateTotalStakingPower() From 946a63ebc1e332ae2e16f4e9abce8b5f001dc4c8 Mon Sep 17 00:00:00 2001 From: tnasu Date: Mon, 28 Feb 2022 15:40:01 +0900 Subject: [PATCH 04/13] Fix codecov: `consensus: calculate prevote message delay metric (backport #7551) (#7617)` --- types/vote_set_test.go | 47 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/types/vote_set_test.go b/types/vote_set_test.go index a811be075..07d903c69 100644 --- a/types/vote_set_test.go +++ b/types/vote_set_test.go @@ -56,6 +56,30 @@ func randVoteSetForPrivKeys( return NewVoteSet("test_chain_id", height, round, signedMsgType, voterSet), vals, voterSet, privValidators } +func addVoteByAllVoterSet(t *testing.T, voteSet *VoteSet, privVals []PrivValidator, height int64, round int32) []Vote { + votes := make([]Vote, voteSet.Size()) + for i, voter := range voteSet.voterSet.Voters { + vote := &Vote{ + Type: tmproto.PrevoteType, + Height: height, + Round: round, + BlockID: randBlockID(), + Timestamp: tmtime.Now(), + ValidatorAddress: voter.Address, + ValidatorIndex: int32(i), + } + v := vote.ToProto() + err := privVals[i].SignVote(voteSet.chainID, v) + require.NoError(t, err) + vote.Signature = v.Signature + added, err := voteSet.AddVote(vote) + require.NoError(t, err) + require.True(t, added) + votes[i] = *vote + } + return votes +} + // Convenience: Return new vote with different validator address/index func withValidator(vote *Vote, addr []byte, idx int32) *Vote { vote = vote.Copy() @@ -207,6 +231,29 @@ func TestVoteSet_AddVote_Bad(t *testing.T) { } } +func TestVoteSet_List(t *testing.T) { + height, round := int64(1), int32(0) + voteSet, _, _, privVals := randVoteSet(height, round, tmproto.PrevoteType, 10, 1) + + votes := addVoteByAllVoterSet(t, voteSet, privVals, height, round) + assert.Equal(t, votes, voteSet.List()) + + voteSet = nil + assert.Nil(t, voteSet.List()) +} + +func TestVoteSet_HasAll(t *testing.T) { + height, round := int64(1), int32(0) + voteSet, _, _, privVals := randVoteSet(height, round, tmproto.PrevoteType, 10, 1) + assert.False(t, voteSet.HasAll()) + + addVoteByAllVoterSet(t, voteSet, privVals, height, round) + assert.True(t, voteSet.HasAll()) + + voteSet = nil + assert.False(t, voteSet.HasAll()) +} + func TestVoteSet_2_3Majority(t *testing.T) { height, round := int64(1), int32(0) voteSet, _, _, privValidators := randVoteSet(height, round, tmproto.PrevoteType, 10, 1) From 22f6ec34354a1a0131cac8eb2d35f800620914da Mon Sep 17 00:00:00 2001 From: tnasu Date: Mon, 28 Feb 2022 12:32:26 +0900 Subject: [PATCH 05/13] consensus: check proposal non-nil in prevote message delay metric (backport #7625) (#7631) * consensus: check proposal non-nil in prevote message delay metric (#7625) (cherry picked from commit b6307c42e095c6f8e9e7c2518fb1004cc8f201a1) * fix merge conflicts Co-authored-by: William Banfield <4561443+williambanfield@users.noreply.github.com> Co-authored-by: William Banfield --- consensus/state.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/consensus/state.go b/consensus/state.go index ea0212f91..0fab98e7b 100644 --- a/consensus/state.go +++ b/consensus/state.go @@ -2397,11 +2397,17 @@ func (cs *State) checkDoubleSigningRisk(height int64) error { } func (cs *State) calculatePrevoteMessageDelayMetrics() { + if cs.Proposal == nil { + return + } + ps := cs.Votes.Prevotes(cs.Round) pl := ps.List() + sort.Slice(pl, func(i, j int) bool { return pl[i].Timestamp.Before(pl[j].Timestamp) }) + var votingPowerSeen int64 for _, v := range pl { _, voter := cs.Voters.GetByAddress(v.ValidatorAddress) From 17bca34e4c8ec6ee715ef6c3f5d4d97e4051c783 Mon Sep 17 00:00:00 2001 From: tnasu Date: Mon, 28 Feb 2022 12:49:19 +0900 Subject: [PATCH 06/13] fix app hash in state rollback (backport #7837) (#7881) When testing rollback feature in the Cosmos SDK, we found that the app hash in Tendermint after rollback was the value after the latest block, rather than before it. Co-authored-by: Callum Waters Co-authored-by: yihuang (cherry picked from commit 8a238fdcb44461a4e029c8e7a4790c2d470d282b) Inline factory function that does not exist in this branch. Co-authored-by: M. J. Fromberger --- state/rollback.go | 10 ++++++++-- state/rollback_test.go | 16 ++++++++++++++-- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/state/rollback.go b/state/rollback.go index 2e1b5a904..f46fd7eed 100644 --- a/state/rollback.go +++ b/state/rollback.go @@ -43,6 +43,12 @@ func Rollback(bs BlockStore, ss Store) (int64, []byte, error) { if rollbackBlock == nil { return -1, nil, fmt.Errorf("block at height %d not found", rollbackHeight) } + // We also need to retrieve the latest block because the app hash and last + // results hash is only agreed upon in the following block. + latestBlock := bs.LoadBlockMeta(invalidState.LastBlockHeight) + if latestBlock == nil { + return -1, nil, fmt.Errorf("block at height %d not found", invalidState.LastBlockHeight) + } _, prevVoterSet, _, _, err := ss.LoadVoters(rollbackHeight, nil) if err != nil { @@ -99,8 +105,8 @@ func Rollback(bs BlockStore, ss Store) (int64, []byte, error) { ConsensusParams: previousParams, LastHeightConsensusParamsChanged: paramsChangeHeight, - LastResultsHash: rollbackBlock.Header.LastResultsHash, - AppHash: rollbackBlock.Header.AppHash, + LastResultsHash: latestBlock.Header.LastResultsHash, + AppHash: latestBlock.Header.AppHash, } // persist the new state. This overrides the invalid one. NOTE: this will also diff --git a/state/rollback_test.go b/state/rollback_test.go index 73db1c1e2..32528bd34 100644 --- a/state/rollback_test.go +++ b/state/rollback_test.go @@ -7,6 +7,7 @@ import ( dbm "github.com/line/tm-db/v2/memdb" "github.com/stretchr/testify/require" + "github.com/line/ostracon/crypto" "github.com/line/ostracon/crypto/tmhash" tmstate "github.com/line/ostracon/proto/ostracon/state" tmversion "github.com/line/ostracon/proto/ostracon/version" @@ -56,12 +57,22 @@ func TestRollback(t *testing.T) { BlockID: initialState.LastBlockID, Header: types.Header{ Height: initialState.LastBlockHeight, - AppHash: initialState.AppHash, + AppHash: crypto.CRandBytes(tmhash.Size), LastBlockID: makeBlockIDRandom(), LastResultsHash: initialState.LastResultsHash, }, } - blockStore.On("LoadBlockMeta", initialState.LastBlockHeight).Return(block) + nextBlock := &types.BlockMeta{ + BlockID: initialState.LastBlockID, + Header: types.Header{ + Height: nextState.LastBlockHeight, + AppHash: initialState.AppHash, + LastBlockID: block.BlockID, + LastResultsHash: nextState.LastResultsHash, + }, + } + blockStore.On("LoadBlockMeta", height).Return(block) + blockStore.On("LoadBlockMeta", nextHeight).Return(nextBlock) blockStore.On("Height").Return(nextHeight) // rollback the state @@ -91,6 +102,7 @@ func TestRollbackNoBlocks(t *testing.T) { stateStore := setupStateStore(t, height) blockStore := &mocks.BlockStore{} blockStore.On("Height").Return(height) + blockStore.On("LoadBlockMeta", height).Return(nil) blockStore.On("LoadBlockMeta", height-1).Return(nil) _, _, err := state.Rollback(blockStore, stateStore) From 218e5cd29c52e56d6aea810ad68935198fd4a576 Mon Sep 17 00:00:00 2001 From: tnasu Date: Mon, 28 Feb 2022 15:42:21 +0900 Subject: [PATCH 07/13] Fix codecov: `fix app hash in state rollback (backport #7837) (#7881)` --- state/rollback.go | 4 ++-- state/rollback_test.go | 12 +++++++++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/state/rollback.go b/state/rollback.go index f46fd7eed..a80c34fc8 100644 --- a/state/rollback.go +++ b/state/rollback.go @@ -41,13 +41,13 @@ func Rollback(bs BlockStore, ss Store) (int64, []byte, error) { rollbackHeight := invalidState.LastBlockHeight - 1 rollbackBlock := bs.LoadBlockMeta(rollbackHeight) if rollbackBlock == nil { - return -1, nil, fmt.Errorf("block at height %d not found", rollbackHeight) + return -1, nil, fmt.Errorf("block at RollbackHeight %d not found", rollbackHeight) } // We also need to retrieve the latest block because the app hash and last // results hash is only agreed upon in the following block. latestBlock := bs.LoadBlockMeta(invalidState.LastBlockHeight) if latestBlock == nil { - return -1, nil, fmt.Errorf("block at height %d not found", invalidState.LastBlockHeight) + return -1, nil, fmt.Errorf("block at LastBlockHeight %d not found", invalidState.LastBlockHeight) } _, prevVoterSet, _, _, err := ss.LoadVoters(rollbackHeight, nil) diff --git a/state/rollback_test.go b/state/rollback_test.go index 32528bd34..7daf69253 100644 --- a/state/rollback_test.go +++ b/state/rollback_test.go @@ -102,12 +102,18 @@ func TestRollbackNoBlocks(t *testing.T) { stateStore := setupStateStore(t, height) blockStore := &mocks.BlockStore{} blockStore.On("Height").Return(height) - blockStore.On("LoadBlockMeta", height).Return(nil) - blockStore.On("LoadBlockMeta", height-1).Return(nil) + blockStore.On("LoadBlockMeta", height-1).Once().Return(nil) _, _, err := state.Rollback(blockStore, stateStore) require.Error(t, err) - require.Contains(t, err.Error(), "block at height 99 not found") + require.Contains(t, err.Error(), "block at RollbackHeight 99 not found") + + blockStore.On("LoadBlockMeta", height-1).Once().Return(&types.BlockMeta{}) + blockStore.On("LoadBlockMeta", height).Return(nil) + + _, _, err = state.Rollback(blockStore, stateStore) + require.Error(t, err) + require.Contains(t, err.Error(), "block at LastBlockHeight 100 not found") } func TestRollbackDifferentStateHeight(t *testing.T) { From 5b5933dd721e50a5936cebf474f7561005bd7534 Mon Sep 17 00:00:00 2001 From: tnasu Date: Mon, 28 Feb 2022 17:43:46 +0900 Subject: [PATCH 08/13] docs: fix cosmos theme version. (#7967) The various package locks got out of sync, reunify them. Co-authored-by: M. J. Fromberger --- docs/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/package.json b/docs/package.json index 58c366345..49b918e6b 100644 --- a/docs/package.json +++ b/docs/package.json @@ -4,7 +4,7 @@ "description": "Ostracon Documentation", "main": "index.js", "dependencies": { - "vuepress-theme-cosmos": "^1.0.180" + "vuepress-theme-cosmos": "^1.0.183" }, "devDependencies": { "watchpack": "^2.1.1" From 43a356c4fcc2040689a1794f2e1624cb291e46ee Mon Sep 17 00:00:00 2001 From: tnasu Date: Mon, 28 Feb 2022 17:53:14 +0900 Subject: [PATCH 09/13] statesync: assert app version matches (backport #7856) (#7885) --- statesync/reactor_test.go | 2 +- statesync/stateprovider.go | 5 +++++ statesync/syncer.go | 36 ++++++++++++++++++++++-------------- statesync/syncer_test.go | 30 ++++++++++++++++-------------- test/e2e/app/app.go | 5 ++++- test/e2e/runner/setup.go | 2 ++ 6 files changed, 50 insertions(+), 30 deletions(-) diff --git a/statesync/reactor_test.go b/statesync/reactor_test.go index d4505276b..82abe3090 100644 --- a/statesync/reactor_test.go +++ b/statesync/reactor_test.go @@ -257,7 +257,7 @@ func makeTestStateAndCommit(appHash string, height int64) (sm.State, *types.Comm Version: tmstate.Version{ Consensus: tmversion.Consensus{ Block: version.BlockProtocol, - App: 0, + App: 9, }, Software: version.OCCoreSemVer, diff --git a/statesync/stateprovider.go b/statesync/stateprovider.go index 2d60b4352..cea707b94 100644 --- a/statesync/stateprovider.go +++ b/statesync/stateprovider.go @@ -18,6 +18,7 @@ import ( rpchttp "github.com/line/ostracon/rpc/client/http" sm "github.com/line/ostracon/state" "github.com/line/ostracon/types" + "github.com/line/ostracon/version" "github.com/line/tm-db/v2/memdb" ) @@ -171,6 +172,10 @@ func (s *lightClientStateProvider) State(ctx context.Context, height uint64) (sm return sm.State{}, err } + state.Version = tmstate.Version{ + Consensus: currentLightBlock.Version, + Software: version.OCCoreSemVer, + } state.LastBlockHeight = lastLightBlock.Height state.LastBlockTime = lastLightBlock.Time state.LastBlockID = lastLightBlock.Commit.BlockID diff --git a/statesync/syncer.go b/statesync/syncer.go index bd2fd0001..983321aba 100644 --- a/statesync/syncer.go +++ b/statesync/syncer.go @@ -314,13 +314,10 @@ func (s *syncer) Sync(snapshot *snapshot, chunks *chunkQueue) (sm.State, sm.Stat return sm.State{}, sm.State{}, nil, err } - // Verify app and update app version - appVersion, err := s.verifyApp(snapshot) - if err != nil { + // Verify app and app version + if err := s.verifyApp(snapshot, state.Version.Consensus.App); err != nil { return sm.State{}, sm.State{}, nil, err } - state.Version.Consensus.App = appVersion - previousState.Version.Consensus.App = appVersion // Done! 🎉 s.logger.Info("Snapshot restored", "height", snapshot.Height, "format", snapshot.Format, @@ -490,25 +487,36 @@ func (s *syncer) requestChunk(snapshot *snapshot, chunk uint32) { })) } -// verifyApp verifies the sync, checking the app hash and last block height. It returns the -// app version, which should be returned as part of the initial state. -func (s *syncer) verifyApp(snapshot *snapshot) (uint64, error) { +// verifyApp verifies the sync, checking the app hash, last block height and app version +func (s *syncer) verifyApp(snapshot *snapshot, appVersion uint64) error { resp, err := s.connQuery.InfoSync(proxy.RequestInfo) if err != nil { - return 0, fmt.Errorf("failed to query ABCI app for appHash: %w", err) + return fmt.Errorf("failed to query ABCI app for appHash: %w", err) + } + + // sanity check that the app version in the block matches the application's own record + // of its version + if resp.AppVersion != appVersion { + // An error here most likely means that the app hasn't inplemented state sync + // or the Info call correctly + return fmt.Errorf("app version mismatch. Expected: %d, got: %d", + appVersion, resp.AppVersion) } if !bytes.Equal(snapshot.trustedAppHash, resp.LastBlockAppHash) { s.logger.Error("appHash verification failed", "expected", snapshot.trustedAppHash, "actual", resp.LastBlockAppHash) - return 0, errVerifyFailed + return errVerifyFailed } if uint64(resp.LastBlockHeight) != snapshot.Height { - s.logger.Error("ABCI app reported unexpected last block height", - "expected", snapshot.Height, "actual", resp.LastBlockHeight) - return 0, errVerifyFailed + s.logger.Error( + "ABCI app reported unexpected last block height", + "expected", snapshot.Height, + "actual", resp.LastBlockHeight, + ) + return errVerifyFailed } s.logger.Info("Verified ABCI app", "height", snapshot.Height, "appHash", snapshot.trustedAppHash) - return resp.AppVersion, nil + return nil } diff --git a/statesync/syncer_test.go b/statesync/syncer_test.go index 7cb8979cb..d6e663490 100644 --- a/statesync/syncer_test.go +++ b/statesync/syncer_test.go @@ -28,6 +28,8 @@ import ( "github.com/line/ostracon/version" ) +const testAppVersion = 9 + // Sets up a basic syncer that can be used to test OfferSnapshot requests func setupOfferSyncer(t *testing.T) (*syncer, *proxymocks.AppConnSnapshot) { connQuery := &proxymocks.AppConnQuery{} @@ -53,7 +55,7 @@ func TestSyncer_SyncAny(t *testing.T) { Version: tmstate.Version{ Consensus: tmversion.Consensus{ Block: version.BlockProtocol, - App: 0, + App: testAppVersion, }, Software: version.OCCoreSemVer, @@ -189,7 +191,7 @@ func TestSyncer_SyncAny(t *testing.T) { Index: 2, Chunk: []byte{1, 1, 2}, }).Once().Return(&abci.ResponseApplySnapshotChunk{Result: abci.ResponseApplySnapshotChunk_ACCEPT}, nil) connQuery.On("InfoSync", proxy.RequestInfo).Return(&abci.ResponseInfo{ - AppVersion: 9, + AppVersion: testAppVersion, LastBlockHeight: 1, LastBlockAppHash: []byte("app_hash"), }, nil) @@ -203,12 +205,8 @@ func TestSyncer_SyncAny(t *testing.T) { assert.Equal(t, map[uint32]int{0: 1, 1: 2, 2: 1}, chunkRequests) chunkRequestsMtx.Unlock() - // The syncer should have updated the state app version from the ABCI info response. expectState := state - expectState.Version.Consensus.App = 9 - expectPreviousState := sm.State{} - expectPreviousState.Version.Consensus.App = expectState.Version.Consensus.App assert.Equal(t, expectState, newState) assert.Equal(t, expectPreviousState, previousState) @@ -622,6 +620,8 @@ func TestSyncer_applyChunks_RejectSenders(t *testing.T) { func TestSyncer_verifyApp(t *testing.T) { boom := errors.New("boom") + const appVersion = 9 + appVersionMismatchErr := errors.New("app version mismatch. Expected: 9, got: 2") s := &snapshot{Height: 3, Format: 1, Chunks: 5, Hash: []byte{1, 2, 3}, trustedAppHash: []byte("app_hash")} testcases := map[string]struct { @@ -632,17 +632,22 @@ func TestSyncer_verifyApp(t *testing.T) { "verified": {&abci.ResponseInfo{ LastBlockHeight: 3, LastBlockAppHash: []byte("app_hash"), - AppVersion: 9, + AppVersion: appVersion, }, nil, nil}, + "invalid app version": {&abci.ResponseInfo{ + LastBlockHeight: 3, + LastBlockAppHash: []byte("app_hash"), + AppVersion: 2, + }, nil, appVersionMismatchErr}, "invalid height": {&abci.ResponseInfo{ LastBlockHeight: 5, LastBlockAppHash: []byte("app_hash"), - AppVersion: 9, + AppVersion: appVersion, }, nil, errVerifyFailed}, "invalid hash": {&abci.ResponseInfo{ LastBlockHeight: 3, LastBlockAppHash: []byte("xxx"), - AppVersion: 9, + AppVersion: appVersion, }, nil, errVerifyFailed}, "error": {nil, boom, boom}, } @@ -657,15 +662,12 @@ func TestSyncer_verifyApp(t *testing.T) { syncer := newSyncer(*cfg, log.NewNopLogger(), connSnapshot, connQuery, stateProvider, "") connQuery.On("InfoSync", proxy.RequestInfo).Return(tc.response, tc.err) - version, err := syncer.verifyApp(s) + err := syncer.verifyApp(s, appVersion) unwrapped := errors.Unwrap(err) if unwrapped != nil { err = unwrapped } - assert.Equal(t, tc.expectErr, err) - if err == nil { - assert.Equal(t, tc.response.AppVersion, version) - } + require.Equal(t, tc.expectErr, err) }) } } diff --git a/test/e2e/app/app.go b/test/e2e/app/app.go index 48851e1d6..ecb6f0ee9 100644 --- a/test/e2e/app/app.go +++ b/test/e2e/app/app.go @@ -18,9 +18,12 @@ import ( "github.com/line/ostracon/version" ) +const appVersion = 1 + // Application is an ABCI application for use by end-to-end tests. It is a // simple key/value store for strings, storing data in memory and persisting // to disk as JSON, taking state sync snapshots if requested. + type Application struct { abci.BaseApplication logger log.Logger @@ -102,7 +105,7 @@ func NewApplication(cfg *Config) (*Application, error) { func (app *Application) Info(req abci.RequestInfo) abci.ResponseInfo { return abci.ResponseInfo{ Version: version.ABCIVersion, - AppVersion: 1, + AppVersion: appVersion, LastBlockHeight: int64(app.state.Height), LastBlockAppHash: app.state.Hash, } diff --git a/test/e2e/runner/setup.go b/test/e2e/runner/setup.go index 739aafc26..8f5a2f6a5 100644 --- a/test/e2e/runner/setup.go +++ b/test/e2e/runner/setup.go @@ -203,6 +203,8 @@ func MakeGenesis(testnet *e2e.Testnet) (types.GenesisDoc, error) { ConsensusParams: types.DefaultConsensusParams(), InitialHeight: testnet.InitialHeight, } + // set the app version to 1 + genesis.ConsensusParams.Version.AppVersion = 1 for validator, power := range testnet.Validators { genesis.Validators = append(genesis.Validators, types.GenesisValidator{ Name: validator.Name, From 829fe6bcaf0cdb19f7e2ed783ab48909ce50cdba Mon Sep 17 00:00:00 2001 From: tnasu Date: Mon, 28 Feb 2022 18:15:01 +0900 Subject: [PATCH 10/13] docs: point docs/master to the same content as the latest release (backport #7980) (#7997) * Remove master from versions and copy it from the latest. (#7980) (cherry picked from commit f939f962b19d87e7f23ec912e388ac9165fb1ff4) --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 56db7b355..98c62d4d2 100644 --- a/Makefile +++ b/Makefile @@ -242,7 +242,7 @@ DESTINATION = ./index.html.md BRANCH := $(shell git branch --show-current) BRANCH_URI := $(shell git branch --show-current | sed 's/[\#]/%23/g') build-docs: - cd docs && \ + @cd docs && \ npm install && \ VUEPRESS_BASE="/$(BRANCH_URI)/" npm run build && \ mkdir -p ~/output/$(BRANCH) && \ From e0d23849a57e10f6f44b50e4d8291395afb21ffc Mon Sep 17 00:00:00 2001 From: tnasu Date: Mon, 28 Feb 2022 18:16:05 +0900 Subject: [PATCH 11/13] Add manual e2e workflow to v0.34.x. (#8005) Co-authored-by: M. J. Fromberger --- .github/workflows/e2e-manual.yml | 35 ++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 .github/workflows/e2e-manual.yml diff --git a/.github/workflows/e2e-manual.yml b/.github/workflows/e2e-manual.yml new file mode 100644 index 000000000..3881aa7fc --- /dev/null +++ b/.github/workflows/e2e-manual.yml @@ -0,0 +1,35 @@ +# Manually run randomly generated E2E testnets (as nightly). +name: e2e-manual +on: + workflow_dispatch: + +jobs: + e2e-nightly-test: + # Run parallel jobs for the listed testnet groups (must match the + # ./build/generator -g flag) + strategy: + fail-fast: false + matrix: + group: ['00', '01', '02', '03'] + runs-on: ubuntu-latest + timeout-minutes: 60 + steps: + - uses: actions/setup-go@v2 + with: + go-version: '1.17' + + - uses: actions/checkout@v2.4.0 + + - name: Build + working-directory: test/e2e + # Run make jobs in parallel, since we can't run steps in parallel. + run: make -j2 docker generator runner tests + + - name: Generate testnets + working-directory: test/e2e + # When changing -g, also change the matrix groups above + run: ./build/generator -g 4 -d networks/nightly/ + + - name: Run ${{ matrix.p2p }} p2p testnets + working-directory: test/e2e + run: ./run-multiple.sh networks/nightly/*-group${{ matrix.group }}-*.toml From 1a06275db22fb3f7e605a35f51ce094cc79c2c1d Mon Sep 17 00:00:00 2001 From: tnasu Date: Mon, 28 Feb 2022 16:14:08 +0900 Subject: [PATCH 12/13] Improve to be shorten timeout on go-test --- .github/workflows/coverage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index d334986bf..f1ccd1dbb 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -79,7 +79,7 @@ jobs: path: crypto/vrf/internal/vrf/sodium - name: test & coverage report creation run: | - cat pkgs.txt.part.${{ matrix.part }} | xargs go test -mod=readonly -timeout 10m -race -coverprofile=${{ matrix.part }}profile.out -covermode=atomic -tags ${{ matrix.vrf }} + cat pkgs.txt.part.${{ matrix.part }} | xargs go test -mod=readonly -timeout 7m -race -coverprofile=${{ matrix.part }}profile.out -covermode=atomic -tags ${{ matrix.vrf }} if: env.GIT_DIFF - uses: actions/upload-artifact@v2 with: From 00703b5d054f52b84fa5a16c28d6565bc3b1eaf9 Mon Sep 17 00:00:00 2001 From: tnasu Date: Mon, 28 Feb 2022 17:34:30 +0900 Subject: [PATCH 13/13] Change tests with high loop counts to benchmark tests to improve test execution time --- types/voter_set_test.go | 40 +++++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/types/voter_set_test.go b/types/voter_set_test.go index 9301260cb..144ef8fc8 100644 --- a/types/voter_set_test.go +++ b/types/voter_set_test.go @@ -391,18 +391,21 @@ The result when we set LoopCount to 10000 << min power=100, max power=100000000, actual average voters=10, max voters=10 >> largest gap: 0.076536 << min power=100, max power=100000000, actual average voters=20, max voters=20 >> largest gap: 0.076547 << min power=100, max power=100000000, actual average voters=29, max voters=29 >> largest gap: 0.147867 + +for testing: +go test -bench BenchmarkSelectVoterReasonableStakingPower github.com/line/ostracon/types -run ^$ -count 1 -benchmem -v */ -func TestSelectVoterReasonableStakingPower(t *testing.T) { +func BenchmarkSelectVoterReasonableStakingPower(b *testing.B) { // Raise LoopCount to get smaller gap over 10000. But large LoopCount takes a lot of time const LoopCount = 100 for minMaxRate := 1; minMaxRate <= 1000000; minMaxRate *= 100 { - findLargestStakingPowerGap(t, LoopCount, minMaxRate, 10) - findLargestStakingPowerGap(t, LoopCount, minMaxRate, 20) - findLargestStakingPowerGap(t, LoopCount, minMaxRate, 29) + findLargestStakingPowerGap(b, LoopCount, minMaxRate, 10) + findLargestStakingPowerGap(b, LoopCount, minMaxRate, 20) + findLargestStakingPowerGap(b, LoopCount, minMaxRate, 29) } } -func findLargestStakingPowerGap(t *testing.T, loopCount int, minMaxRate int, maxVoters int) { +func findLargestStakingPowerGap(b *testing.B, loopCount int, minMaxRate int, maxVoters int) { valSet, privMap := randValidatorSetWithMinMax(PrivKeyEd25519, 30, 100, 100*int64(minMaxRate)) genDoc := &GenesisDoc{ GenesisTime: tmtime.Now(), @@ -432,7 +435,7 @@ func findLargestStakingPowerGap(t *testing.T, loopCount int, minMaxRate int, max largestGap = math.Abs(float64(val.StakingPower-acc)) / float64(val.StakingPower) } } - t.Logf("<< min power=100, max power=%d, actual average voters=%d, max voters=%d >> largest gap: %f", + b.Logf("<< min power=100, max power=%d, actual average voters=%d, max voters=%d >> largest gap: %f", 100*minMaxRate, totalVoters/loopCount, maxVoters, largestGap) } @@ -442,17 +445,20 @@ func findLargestStakingPowerGap(t *testing.T, loopCount int, minMaxRate int, max MaxSamplingLoopTry. If MaxSamplingLoopTry is very large then actual elected voters is up to MaxVoters, but large MaxSamplingLoopTry takes too much time. + +for testing: +go test -bench BenchmarkSelectVoterMaxVarious github.com/line/ostracon/types -run ^$ -count 1 -benchmem -v */ -func TestSelectVoterMaxVarious(t *testing.T) { +func BenchmarkSelectVoterMaxVarious(b *testing.B) { hash := 0 for minMaxRate := 1; minMaxRate <= 100000000; minMaxRate *= 10000 { - t.Logf("<<< min: 100, max: %d >>>", 100*minMaxRate) + b.Logf("<<< min: 100, max: %d >>>", 100*minMaxRate) for validators := 16; validators <= 256; validators *= 4 { for voters := 1; voters <= validators; voters += 10 { valSet, _ := randValidatorSetWithMinMax(PrivKeyEd25519, validators, 100, 100*int64(minMaxRate)) voterSet := SelectVoter(valSet, []byte{byte(hash)}, &VoterParams{int32(voters), 20}) if voterSet.Size() < voters { - t.Logf("Cannot elect voters up to MaxVoters: validators=%d, MaxVoters=%d, actual voters=%d", + b.Logf("Cannot elect voters up to MaxVoters: validators=%d, MaxVoters=%d, actual voters=%d", validators, voters, voterSet.Size()) break } @@ -977,7 +983,11 @@ func TestMyMy(t *testing.T) { t.Logf("a=%v, b=%v", a, b) } -func TestElectVotersNonDup(t *testing.T) { +/** +for testing: +go test -bench BenchmarkElectVotersNonDup github.com/line/ostracon/types -run ^$ -count 1 -benchmem -v +*/ +func BenchmarkElectVotersNonDup(b *testing.B) { for n := 100; n <= 1000; n += 100 { rand.Seed(int64(n)) validators := newValidatorSet(n, func(i int) int64 { @@ -992,7 +1002,7 @@ func TestElectVotersNonDup(t *testing.T) { } break } - assert.True(t, isByzantineTolerable(winners, 30)) + assert.True(b, isByzantineTolerable(winners, 30)) } } @@ -1117,7 +1127,11 @@ func TestElectVotersNonDupWithOverflow(t *testing.T) { electVotersNonDup(validators.Validators, 0, 30, 0) } -func TestElectVotersNonDupDistribution(t *testing.T) { +/** +for testing: +go test -bench BenchmarkElectVotersNonDupDistribution github.com/line/ostracon/types -run ^$ -count 1 -benchmem -v +*/ +func BenchmarkElectVotersNonDupDistribution(b *testing.B) { validators := newValidatorSet(100, func(i int) int64 { return 1000 }) @@ -1133,7 +1147,7 @@ func TestElectVotersNonDupDistribution(t *testing.T) { } for _, v := range scores { - assert.True(t, v >= 900 && v <= 1100) + assert.True(b, v >= 900 && v <= 1100) } }