diff --git a/consensus/state.go b/consensus/state.go index 0d62fa5d3..babe326dc 100644 --- a/consensus/state.go +++ b/consensus/state.go @@ -17,6 +17,7 @@ import ( cfg "github.com/tendermint/tendermint/config" cstypes "github.com/tendermint/tendermint/consensus/types" + "github.com/tendermint/tendermint/crypto" tmevents "github.com/tendermint/tendermint/libs/events" "github.com/tendermint/tendermint/p2p" sm "github.com/tendermint/tendermint/state" @@ -31,6 +32,8 @@ var ( ErrInvalidProposalPOLRound = errors.New("Error invalid proposal POL round") ErrAddingVote = errors.New("Error adding vote") ErrVoteHeightMismatch = errors.New("Error vote height mismatch") + + errPubKeyIsNotSet = errors.New("Pubkey is not set. Look for \"Can't get private validator pubkey\" errors") ) //----------------------------------------------------------------------------- @@ -95,6 +98,9 @@ type ConsensusState struct { mtx sync.RWMutex cstypes.RoundState state sm.State // State until height-1. + // privValidator pubkey, memoized for the duration of one block + // to avoid extra requests to HSM + privValidatorPubKey crypto.PubKey // state changes may be triggered by: msgs from peers, // msgs from ourself, or by timeouts @@ -251,11 +257,17 @@ func (cs *ConsensusState) GetValidators() (int64, []*types.Validator) { return cs.state.LastBlockHeight, cs.state.Validators.Copy().Validators } -// SetPrivValidator sets the private validator account for signing votes. +// SetPrivValidator sets the private validator account for signing votes. It +// immediately requests pubkey and caches it. func (cs *ConsensusState) SetPrivValidator(priv types.PrivValidator) { cs.mtx.Lock() + defer cs.mtx.Unlock() + cs.privValidator = priv - cs.mtx.Unlock() + + if err := cs.updatePrivValidatorPubKey(); err != nil { + cs.Logger.Error("Can't get private validator pubkey", "err", err) + } } // SetTimeoutTicker sets the local timer. It may be useful to overwrite for testing. @@ -888,8 +900,17 @@ func (cs *ConsensusState) enterPropose(height int64, round int) { return } + logger.Debug("This node is a validator") + + if cs.privValidatorPubKey == nil { + // If this node is a validator & proposer in the current round, it will + // miss the opportunity to create a block. + logger.Error(fmt.Sprintf("enterPropose: %v", errPubKeyIsNotSet)) + return + } + address := cs.privValidatorPubKey.Address() + // if not a validator, we're done - address := cs.privValidator.GetPubKey().Address() if !cs.Validators.HasAddress(address) { logger.Debug("This node is not a validator", "addr", address, "vals", cs.Validators) return @@ -966,7 +987,12 @@ func (cs *ConsensusState) isProposalComplete() bool { // is returned for convenience so we can log the proposal block. // Returns nil block upon error. // NOTE: keep it side-effect free for clarity. +// CONTRACT: cs.privValidator is not nil. func (cs *ConsensusState) createProposalBlock() (block *types.Block, blockParts *types.PartSet) { + if cs.privValidator == nil { + panic("entered createProposalBlock with privValidator being nil") + } + var commit *types.Commit switch { case cs.Height == 1: @@ -976,13 +1002,19 @@ func (cs *ConsensusState) createProposalBlock() (block *types.Block, blockParts case cs.LastCommit.HasTwoThirdsMajority(): // Make the commit from LastCommit commit = cs.LastCommit.MakeCommit() - default: - // This shouldn't happen. - cs.Logger.Error("enterPropose: Cannot propose anything: No commit for the previous block.") + default: // This shouldn't happen. + cs.Logger.Error("enterPropose: Cannot propose anything: No commit for the previous block") return } - proposerAddr := cs.privValidator.GetPubKey().Address() + if cs.privValidatorPubKey == nil { + // If this node is a validator & proposer in the current round, it will + // miss the opportunity to create a block. + cs.Logger.Error(fmt.Sprintf("enterPropose: %v", errPubKeyIsNotSet)) + return + } + proposerAddr := cs.privValidatorPubKey.Address() + return cs.blockExec.CreateProposalBlock(cs.Height, cs.state, commit, proposerAddr) } @@ -1359,6 +1391,11 @@ func (cs *ConsensusState) finalizeCommit(height int64) { fail.Fail() // XXX + // Private validator might have changed it's key pair => refetch pubkey. + if err := cs.updatePrivValidatorPubKey(); err != nil { + cs.Logger.Error("Can't get private validator pubkey", "err", err) + } + // cs.StartTime is already set. // Schedule Round0 to start soon. cs.scheduleRound0(&cs.RoundState) @@ -1372,8 +1409,11 @@ func (cs *ConsensusState) finalizeCommit(height int64) { func (cs *ConsensusState) recordMetrics(height int64, block *types.Block) { cs.metrics.Validators.Set(float64(cs.Validators.Size())) cs.metrics.ValidatorsPower.Set(float64(cs.Validators.TotalVotingPower())) - missingValidators := 0 - missingValidatorsPower := int64(0) + var ( + missingValidators = 0 + missingValidatorsPower int64 + ) + for i, val := range cs.Validators.Validators { var vote *types.CommitSig if i < len(block.LastCommit.Precommits) { @@ -1526,9 +1566,16 @@ func (cs *ConsensusState) tryAddVote(vote *types.Vote, peerID p2p.ID) (bool, err if err == ErrVoteHeightMismatch { return added, err } else if voteErr, ok := err.(*types.ErrVoteConflictingVotes); ok { + if cs.privValidatorPubKey == nil { + return false, errPubKeyIsNotSet + } addr := cs.privValidator.GetPubKey().Address() if bytes.Equal(vote.ValidatorAddress, addr) { - cs.Logger.Error("Found conflicting vote from ourselves. Did you unsafe_reset a validator?", "height", vote.Height, "round", vote.Round, "type", vote.Type) + cs.Logger.Error( + "Found conflicting vote from ourselves. Did you unsafe_reset a validator?", + "height", vote.Height, + "round", vote.Round, + "type", vote.Type) return added, err } cs.evpool.AddEvidence(voteErr.DuplicateVoteEvidence) @@ -1704,7 +1751,10 @@ func (cs *ConsensusState) signVote(type_ types.SignedMsgType, hash []byte, heade // Flush the WAL. Otherwise, we may not recompute the same vote to sign, and the privValidator will refuse to sign anything. cs.wal.FlushAndSync() - addr := cs.privValidator.GetPubKey().Address() + if cs.privValidatorPubKey == nil { + return nil, errPubKeyIsNotSet + } + addr := cs.privValidatorPubKey.Address() valIndex, _ := cs.Validators.GetByAddress(addr) vote := &types.Vote{ @@ -1739,8 +1789,18 @@ func (cs *ConsensusState) voteTime() time.Time { // sign the vote and publish on internalMsgQueue func (cs *ConsensusState) signAddVote(type_ types.SignedMsgType, hash []byte, header types.PartSetHeader) *types.Vote { - // if we don't have a key or we're not in the validator set, do nothing - if cs.privValidator == nil || !cs.Validators.HasAddress(cs.privValidator.GetPubKey().Address()) { + if cs.privValidator == nil { // the node does not have a key + return nil + } + + if cs.privValidatorPubKey == nil { + // Vote won't be signed, but it's not critical. + cs.Logger.Error(fmt.Sprintf("signAddVote: %v", errPubKeyIsNotSet)) + return nil + } + + // If the node not in the validator set, do nothing. + if !cs.Validators.HasAddress(cs.privValidatorPubKey.Address()) { return nil } vote, err := cs.signVote(type_, hash, header) @@ -1755,6 +1815,18 @@ func (cs *ConsensusState) signAddVote(type_ types.SignedMsgType, hash []byte, he return nil } +// updatePrivValidatorPubKey get's the private validator public key and +// memoizes it. This func returns an error if the private validator is not +// responding or responds with an error. +func (cs *ConsensusState) updatePrivValidatorPubKey() error { + if cs.privValidator == nil { + return nil + } + + cs.privValidatorPubKey = cs.privValidator.GetPubKey() + return nil +} + //--------------------------------------------------------- func CompareHRS(h1 int64, r1 int, s1 cstypes.RoundStepType, h2 int64, r2 int, s2 cstypes.RoundStepType) int { diff --git a/node/node.go b/node/node.go index 64f0d4a2f..722fc467c 100644 --- a/node/node.go +++ b/node/node.go @@ -1274,7 +1274,19 @@ func createAndStartPrivValidatorSocketClient( return nil, errors.Wrap(err, "failed to start private validator") } - return pvsc, nil + // try to get a pubkey from private validate first time + pubKey := pvsc.GetPubKey() + if pubKey == nil { + return nil, errors.New("could not retrieve public key from private validator") + } + + const ( + retries = 50 // 50 * 100ms = 5s total + timeout = 100 * time.Millisecond + ) + pvscWithRetries := privval.NewRetrySignerClient(pvsc, retries, timeout) + + return pvscWithRetries, nil } // splitAndTrimEmpty slices s into all subslices separated by sep and returns a diff --git a/node/node_test.go b/node/node_test.go index 77e970d28..663ad24b7 100644 --- a/node/node_test.go +++ b/node/node_test.go @@ -158,7 +158,7 @@ func TestNodeSetPrivValTCP(t *testing.T) { n, err := DefaultNewNode(config, log.TestingLogger()) require.NoError(t, err) - assert.IsType(t, &privval.SignerClient{}, n.PrivValidator()) + assert.IsType(t, &privval.RetrySignerClient{}, n.PrivValidator()) } // address without a protocol must result in error @@ -202,7 +202,7 @@ func TestNodeSetPrivValIPC(t *testing.T) { n, err := DefaultNewNode(config, log.TestingLogger()) require.NoError(t, err) - assert.IsType(t, &privval.SignerClient{}, n.PrivValidator()) + assert.IsType(t, &privval.RetrySignerClient{}, n.PrivValidator()) } // testFreeAddr claims a free port so we don't block on listener being ready. diff --git a/privval/errors.go b/privval/errors.go index 9f151f11d..297d5dca2 100644 --- a/privval/errors.go +++ b/privval/errors.go @@ -1,9 +1,11 @@ package privval import ( + "errors" "fmt" ) +// EndpointTimeoutError occurs when endpoint times out. type EndpointTimeoutError struct{} // Implement the net.Error interface. @@ -13,15 +15,15 @@ func (e EndpointTimeoutError) Temporary() bool { return true } // Socket errors. var ( - ErrUnexpectedResponse = fmt.Errorf("received unexpected response") - ErrNoConnection = fmt.Errorf("endpoint is not connected") ErrConnectionTimeout = EndpointTimeoutError{} - - ErrReadTimeout = fmt.Errorf("endpoint read timed out") - ErrWriteTimeout = fmt.Errorf("endpoint write timed out") + ErrNoConnection = errors.New("endpoint is not connected") + ErrReadTimeout = errors.New("endpoint read timed out") + ErrUnexpectedResponse = errors.New("empty response") + ErrWriteTimeout = errors.New("endpoint write timed out") ) -// RemoteSignerError allows (remote) validators to include meaningful error descriptions in their reply. +// RemoteSignerError allows (remote) validators to include meaningful error +// descriptions in their reply. type RemoteSignerError struct { // TODO(ismail): create an enum of known errors Code int diff --git a/privval/retry_signer_client.go b/privval/retry_signer_client.go new file mode 100644 index 000000000..f6c10a521 --- /dev/null +++ b/privval/retry_signer_client.go @@ -0,0 +1,85 @@ +package privval + +import ( + "errors" + "time" + + "github.com/tendermint/tendermint/crypto" + "github.com/tendermint/tendermint/types" +) + +// RetrySignerClient wraps SignerClient adding retry for each operation (except +// Ping) w/ a timeout. +type RetrySignerClient struct { + next *SignerClient + retries int + timeout time.Duration +} + +// NewRetrySignerClient returns RetrySignerClient. If +retries+ is 0, the +// client will be retrying each operation indefinitely. +func NewRetrySignerClient(sc *SignerClient, retries int, timeout time.Duration) *RetrySignerClient { + return &RetrySignerClient{sc, retries, timeout} +} + +var _ types.PrivValidator = (*RetrySignerClient)(nil) + +func (sc *RetrySignerClient) Close() error { + return sc.next.Close() +} + +func (sc *RetrySignerClient) IsConnected() bool { + return sc.next.IsConnected() +} + +func (sc *RetrySignerClient) WaitForConnection(maxWait time.Duration) error { + return sc.next.WaitForConnection(maxWait) +} + +//-------------------------------------------------------- +// Implement PrivValidator + +func (sc *RetrySignerClient) Ping() error { + return sc.next.Ping() +} + +func (sc *RetrySignerClient) GetPubKey() crypto.PubKey { + for i := 0; i < sc.retries || sc.retries == 0; i++ { + pk := sc.next.GetPubKey() + if pk != nil { + return pk + } + time.Sleep(sc.timeout) + } + return nil +} + +func (sc *RetrySignerClient) SignVote(chainID string, vote *types.Vote) error { + for i := 0; i < sc.retries || sc.retries == 0; i++ { + err := sc.next.SignVote(chainID, vote) + if err == nil { + return nil + } + // If remote signer errors, we don't retry. + if _, ok := err.(*RemoteSignerError); ok { + return err + } + time.Sleep(sc.timeout) + } + return errors.New("exhausted all attempts to sign vote") +} + +func (sc *RetrySignerClient) SignProposal(chainID string, proposal *types.Proposal) error { + for i := 0; i < sc.retries || sc.retries == 0; i++ { + err := sc.next.SignProposal(chainID, proposal) + if err == nil { + return nil + } + // If remote signer errors, we don't retry. + if _, ok := err.(*RemoteSignerError); ok { + return err + } + time.Sleep(sc.timeout) + } + return errors.New("exhausted all attempts to sign proposal") +} diff --git a/privval/signer_client.go b/privval/signer_client.go index 0885ee4aa..cbdb6ed26 100644 --- a/privval/signer_client.go +++ b/privval/signer_client.go @@ -50,7 +50,6 @@ func (sc *SignerClient) WaitForConnection(maxWait time.Duration) error { // Ping sends a ping request to the remote signer func (sc *SignerClient) Ping() error { response, err := sc.endpoint.SendRequest(&PingRequest{}) - if err != nil { sc.endpoint.Logger.Error("SignerClient::Ping", "err", err) return nil @@ -58,7 +57,6 @@ func (sc *SignerClient) Ping() error { _, ok := response.(*PingResponse) if !ok { - sc.endpoint.Logger.Error("SignerClient::Ping", "err", "response != PingResponse") return err } @@ -91,16 +89,13 @@ func (sc *SignerClient) GetPubKey() crypto.PubKey { func (sc *SignerClient) SignVote(chainID string, vote *types.Vote) error { response, err := sc.endpoint.SendRequest(&SignVoteRequest{Vote: vote}) if err != nil { - sc.endpoint.Logger.Error("SignerClient::SignVote", "err", err) return err } resp, ok := response.(*SignedVoteResponse) if !ok { - sc.endpoint.Logger.Error("SignerClient::GetPubKey", "err", "response != SignedVoteResponse") return ErrUnexpectedResponse } - if resp.Error != nil { return resp.Error } @@ -113,13 +108,11 @@ func (sc *SignerClient) SignVote(chainID string, vote *types.Vote) error { func (sc *SignerClient) SignProposal(chainID string, proposal *types.Proposal) error { response, err := sc.endpoint.SendRequest(&SignProposalRequest{Proposal: proposal}) if err != nil { - sc.endpoint.Logger.Error("SignerClient::SignProposal", "err", err) return err } resp, ok := response.(*SignedProposalResponse) if !ok { - sc.endpoint.Logger.Error("SignerClient::SignProposal", "err", "response != SignedProposalResponse") return ErrUnexpectedResponse } if resp.Error != nil { diff --git a/privval/signer_client_test.go b/privval/signer_client_test.go index 3d7cfb3e0..e44577170 100644 --- a/privval/signer_client_test.go +++ b/privval/signer_client_test.go @@ -250,8 +250,7 @@ func TestSignerUnexpectedResponse(t *testing.T) { ts := time.Now() want := &types.Vote{Timestamp: ts, Type: types.PrecommitType} - e := tc.signerClient.SignVote(tc.chainID, want) - assert.EqualError(t, e, "received unexpected response") + assert.EqualError(t, e, "empty response") } }