Skip to content

Commit

Permalink
chain: retry on db failure (#478)
Browse files Browse the repository at this point in the history
* init commit

* make: add two-chains target

* chain: patch MustGetLatestLightHeight

* lint++

* lint++

* update scripts

* lint++

* lint++

* update script

* github actions++

* github actions++

* github actions++

* github actions++

* github actions++

* github actions++

* github actions++

* github actions++

* github actions++

* github actions++

* github actions++

* github actions++

* github actions++

* github actions++
  • Loading branch information
alexanderbez authored Mar 29, 2021
1 parent 76eb658 commit 56c88b6
Show file tree
Hide file tree
Showing 17 changed files with 210 additions and 126 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,7 @@ release.tar.gz
nchainz/
.idea/
.csv
chain-code/
chain-code/
two-chains/ibc-*
two-chains/.relayer
two-chains/*.log
7 changes: 3 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,14 @@ COPY . .

# Update and install needed deps prioir to installing the binary.
RUN apk update && \
apk --no-cache add make git && \
make install
apk --no-cache add make git && \
make install

FROM alpine:latest

ENV RELAYER /relayer

RUN addgroup rlyuser && \
adduser -S -G rlyuser rlyuser -h "$RELAYER"
RUN addgroup rlyuser && adduser -S -G rlyuser rlyuser -h "$RELAYER"

USER rlyuser

Expand Down
13 changes: 11 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,15 @@ install: go.sum
###############################################################################
# Tests / CI
###############################################################################

two-chains:
@docker-compose -f ./two-chains/docker-compose.yaml down
@rm -fr ./two-chains/ibc-* ./two-chains/.relayer ./two-chains/rly.log
@docker-compose -f ./two-chains/docker-compose.yaml up -d
@while ! curl localhost:26657 &> /dev/null; do sleep 1; done
@while ! curl localhost:26667 &> /dev/null; do sleep 1; done
@cd ./two-chains && sh relayer-setup && cd ..

test:
@TEST_DEBUG=true go test -mod=readonly -v ./test/...

Expand All @@ -66,8 +75,6 @@ lint:
@find . -name '*.go' -type f -not -path "*.git*" | xargs gofmt -d -s
@go mod verify

.PHONY: install build lint coverage clean

###############################################################################
# Chain Code Downloads
###############################################################################
Expand Down Expand Up @@ -104,3 +111,5 @@ check-swagger:

update-swagger-docs: check-swagger
swagger generate spec -o ./docs/swagger-ui/swagger.yaml

.PHONY: two-chains test install build lint coverage clean
22 changes: 0 additions & 22 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ wanting to build their [IBC](https://ibcprotocol.org/)-compliant relayer.
- [Compatibility Table](#compatibility-table)
- [Testnet](#testnet)
- [Demo](#demo)
- [Setting up Developer Environment](#setting-up-developer-environment)
- [Security Notice](#security-notice)
- [Code of Conduct](#code-of-conduct)

Expand Down Expand Up @@ -263,27 +262,6 @@ $ rly q bal ibc-1
# You can change the amount of fees you are paying on each chain in the configuration.
```

## Setting up Developer Environment

Working with the relayer can frequently involve working with local development
branches of your desired applications/networks, e.g. `gaia`, `akash`, in addition
to `cosmos-sdk` and the `relayer`.

To setup your environment to point at the local versions of the code and reduce
the amount of time in your read-eval-print loops try the following:

1. Set `replace github.com/cosmos/cosmos-sdk => /path/to/local/github.com/comsos/cosmos-sdk`
at the end of the `go.mod` files for the `relayer` and your network/application,
e.g. `gaia`. This will force building from the local version of the `cosmos-sdk`
when running the `./dev-env` script.
2. After `./dev-env` has run, you can use `go run main.go` for any relayer
commands you are working on. This allows you make changes and immediately test
them as long as there are no server side changes.
3. If you make changes in `cosmos-sdk` that need to be reflected server-side,
be sure to re-run `./two-chainz`.
4. If you need to work off of a `gaia` branch other than `master`, change the
branch name at the top of the `./two-chainz` script.

## Security Notice

If you would like to report a security critical bug related to the relayer repo,
Expand Down
11 changes: 7 additions & 4 deletions cmd/tx.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,13 @@ Most of these commands take a [path] argument. Make sure:

func sendCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "send [chain-id] [from-key] [to-address] [amount]",
Short: "send funds to a different address on the same chain",
Args: cobra.ExactArgs(4),
Example: strings.TrimSpace(fmt.Sprintf(`$ %s tx send testkey cosmos10yft4nc8tacpngwlpyq3u4t88y7qzc9xv0q4y8 10000uatom`, appName)),
Use: "send [chain-id] [from-key] [to-address] [amount]",
Short: "send funds to a different address on the same chain",
Args: cobra.ExactArgs(4),
Example: strings.TrimSpace(fmt.Sprintf(`
$ %s tx send testkey cosmos10yft4nc8tacpngwlpyq3u4t88y7qzc9xv0q4y8 10000uatom`,
appName,
)),
RunE: func(cmd *cobra.Command, args []string) error {
c, err := config.Chains.Get(args[0])
if err != nil {
Expand Down
6 changes: 0 additions & 6 deletions configs/demo/paths/demo.json
Original file line number Diff line number Diff line change
@@ -1,18 +1,12 @@
{
"src": {
"chain-id": "ibc-0",
"client-id": "",
"connection-id": "",
"channel-id": "",
"port-id": "transfer",
"order": "unordered",
"version": "ics20-1"
},
"dst": {
"chain-id": "ibc-1",
"client-id": "",
"connection-id": "",
"channel-id": "",
"port-id": "transfer",
"order": "unordered",
"version": "ics20-1"
Expand Down
29 changes: 0 additions & 29 deletions dev-env

This file was deleted.

30 changes: 0 additions & 30 deletions docker-compose.yaml

This file was deleted.

15 changes: 0 additions & 15 deletions main.go
Original file line number Diff line number Diff line change
@@ -1,18 +1,3 @@
/*
Copyright © 2020 Jack Zampolin [email protected]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package main

import "github.com/cosmos/relayer/cmd"
Expand Down
41 changes: 29 additions & 12 deletions relayer/tm-light-client.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ var (
// a lock to prevent two processes from trying to access the light client
// database at the same time resulting in errors and panics.
lightDBMutex sync.Mutex

// ErrDatabase defines a sentinel database general error type.
ErrDatabase = errors.New("database failure")
)

func lightError(err error) error { return fmt.Errorf("light client: %w", err) }
Expand Down Expand Up @@ -175,28 +178,29 @@ func (c *Chain) LightClient(db dbm.DB) (*light.Client, error) {
)
}

// NewLightDB returns a new instance of the lightclient database connection
// CONTRACT: must close the database connection when done with it (defer df())
func (c *Chain) NewLightDB() (db *dbm.GoLevelDB, df func(), err error) {
// a lock is used to prevent error messages or panics from two processes
// trying to simultanenously use the light client
// NewLightDB returns a new instance of the lightclient database connection. The
// caller MUST close the database connection through a deferred execution of the
// returned cleanup function.
func (c *Chain) NewLightDB() (db *dbm.GoLevelDB, cleanup func(), err error) {
// XXX: A lock is used to prevent error messages or panics from two processes
// trying to simultaneously use the light client.
lightDBMutex.Lock()

db, err = dbm.NewGoLevelDB(c.ChainID, lightDir(c.HomePath))
if err != nil {
lightDBMutex.Unlock()
return nil, nil, fmt.Errorf("can't open light client database: %w", err)
return nil, nil, fmt.Errorf("%s: %w", err, ErrDatabase)
}

df = func() {
cleanup = func() {
err := db.Close()
lightDBMutex.Unlock()
if err != nil {
panic(err)
panic(fmt.Sprintf("failed to close light client database: %s", err))
}
}

return
return db, cleanup, nil
}

// DeleteLightDB removes the light client database on disk, forcing re-initialization
Expand Down Expand Up @@ -269,12 +273,25 @@ func (c *Chain) GetLatestLightHeight() (int64, error) {
return client.LastTrustedHeight()
}

// MustGetLatestLightHeight returns the latest height of the light client
// and panics if an error occurs.
// MustGetLatestLightHeight returns the latest height of the light client. If
// an error occurs due to a database failure, we keep trying with a delayed
// re-attempt. Otherwise, we panic.
func (c *Chain) MustGetLatestLightHeight() uint64 {
height, err := c.GetLatestLightHeight()
if err != nil {
panic(err)
if errors.Is(err, ErrDatabase) {
// XXX: Sleep and try again if the database is unavailable. This can easily
// happen if two distinct resources try to access the database at the same
// time. To avoid causing a corrupted or lost packet, we keep trying as to
// not halt the relayer.
//
// ref: https://github.com/cosmos/relayer/issues/444
c.logger.Error("failed to get latest height due to a database failure; trying again...", "err", err)
time.Sleep(time.Second)
c.MustGetLatestLightHeight()
} else {
panic(err)
}
}

return uint64(height)
Expand Down
2 changes: 1 addition & 1 deletion scripts/one-chain
Original file line number Diff line number Diff line change
Expand Up @@ -109,4 +109,4 @@ else
fi

# Start the gaia
redirect $BINARY --home $CHAINDIR/$CHAINID start --pruning=nothing --grpc.address="0.0.0.0:$GRPCPORT" > $CHAINDIR/$CHAINID.log &
redirect $BINARY --home $CHAINDIR/$CHAINID start --pruning=nothing --grpc.address="0.0.0.0:$GRPCPORT" > $CHAINDIR/$CHAINID.log 2>&1 &
9 changes: 9 additions & 0 deletions two-chains/chains/ibc-0.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"key": "ibc-0-relayer-key",
"chain-id": "ibc-0",
"rpc-addr": "http://localhost:26657",
"account-prefix": "cosmos",
"gas-adjustment": 1.5,
"gas-prices": "0.025uatom",
"trusting-period": "336h"
}
9 changes: 9 additions & 0 deletions two-chains/chains/ibc-1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"key": "ibc-1-relayer-key",
"chain-id": "ibc-1",
"rpc-addr": "http://localhost:26667",
"account-prefix": "cosmos",
"gas-adjustment": 1.5,
"gas-prices": "0.025uatom",
"trusting-period": "336h"
}
38 changes: 38 additions & 0 deletions two-chains/docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
version: "3.9"
services:
ibc-0:
image: "tendermint/gaia:v4.2.0"
ports:
- "26656-26657:26656-26657"
- "1317:1317"
- "9090:9090"
volumes:
- ./ibc-0:/gaia/.gaia:Z
command: >
sh -c "gaiad --chain-id=ibc-0 init ibc-0
&& gaiad keys add validator --keyring-backend='test' --output json > $$HOME/.gaia/validator_seed.json 2> /dev/null
&& gaiad keys add user --keyring-backend='test' --output json > $$HOME/.gaia/key_seed.json 2> /dev/null
&& gaiad add-genesis-account $$(gaiad keys --keyring-backend='test' show user -a) 100000000000stake,100000000000uatom
&& gaiad add-genesis-account $$(gaiad keys --keyring-backend='test' show validator -a) 100000000000stake,100000000000uatom
&& gaiad gentx validator 100000000000stake --keyring-backend='test' --chain-id ibc-0
&& gaiad collect-gentxs
&& sed -i'.bak' -e 's#tcp://127.0.0.1:26657#tcp://0.0.0.0:26657#g' $$HOME/.gaia/config/config.toml
&& gaiad start --pruning=nothing"
ibc-1:
image: "tendermint/gaia:v4.2.0"
ports:
- "26666-26667:26656-26657"
- "1318:1317"
- "9091:9090"
volumes:
- ./ibc-1:/gaia/.gaia:Z
command: >
sh -c "gaiad --chain-id=ibc-1 init ibc-1
&& gaiad keys add validator --keyring-backend='test' --output json > $$HOME/.gaia/validator_seed.json 2> /dev/null
&& gaiad keys add user --keyring-backend='test' --output json > $$HOME/.gaia/key_seed.json 2> /dev/null
&& gaiad add-genesis-account $$(gaiad keys --keyring-backend='test' show user -a) 100000000000stake,100000000000uatom
&& gaiad add-genesis-account $$(gaiad keys --keyring-backend='test' show validator -a) 100000000000stake,100000000000uatom
&& gaiad gentx validator 100000000000stake --keyring-backend='test' --chain-id ibc-1
&& gaiad collect-gentxs
&& sed -i'.bak' -e 's#tcp://127.0.0.1:26657#tcp://0.0.0.0:26657#g' $$HOME/.gaia/config/config.toml
&& gaiad start --pruning=nothing"
15 changes: 15 additions & 0 deletions two-chains/paths/transfer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"src": {
"chain-id": "ibc-0",
"port-id": "transfer",
"order": "unordered",
"version": "ics20-1"
},
"dst": {
"chain-id": "ibc-1",
"port-id": "transfer",
"order": "unordered",
"version": "ics20-1"
},
"strategy": { "type": "naive" }
}
Loading

0 comments on commit 56c88b6

Please sign in to comment.