Skip to content

Commit

Permalink
feat(cmd): implement offline pruning of state trie (#1564)
Browse files Browse the repository at this point in the history
  • Loading branch information
arijitAD authored May 20, 2021
1 parent 935bc59 commit af9c925
Show file tree
Hide file tree
Showing 18 changed files with 557 additions and 32 deletions.
34 changes: 34 additions & 0 deletions cmd/gossamer/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,29 @@ var (
}
)

// State Prune flags
var (
// BloomFilterSizeFlag size for bloom filter, valid for the use with prune-state subcommand
BloomFilterSizeFlag = cli.IntFlag{
Name: "bloom-size",
Usage: "Megabytes of memory allocated to bloom-filter for pruning",
Value: 2048,
}

// DBPathFlag data directory for pruned DB, valid for the use with prune-state subcommand
DBPathFlag = cli.StringFlag{
Name: "pruned-db-path",
Usage: "Data directory for the output DB",
}

// RetainBlockNumberFlag retain number of block from latest block while pruning, valid for the use with prune-state subcommand
RetainBlockNumberFlag = cli.IntFlag{
Name: "retain-blocks",
Usage: "Retain number of block from latest block while pruning",
Value: 256,
}
)

// flag sets that are shared by multiple commands
var (
// GlobalFlags are flags that are valid for use with the root command and all subcommands
Expand All @@ -276,6 +299,9 @@ var (
CPUProfFlag,
MemProfFlag,
RewindFlag,
DBPathFlag,
BloomFilterSizeFlag,
RetainBlockNumberFlag,
}

// StartupFlags are flags that are valid for use with the root command and the export subcommand
Expand Down Expand Up @@ -354,6 +380,14 @@ var (
HeaderFlag,
FirstSlotFlag,
}

PruningFlags = []cli.Flag{
ChainFlag,
ConfigFlag,
DBPathFlag,
BloomFilterSizeFlag,
RetainBlockNumberFlag,
}
)

// FixFlagOrder allow us to use various flag order formats (ie, `gossamer init
Expand Down
51 changes: 51 additions & 0 deletions cmd/gossamer/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"os"

"github.com/ChainSafe/gossamer/dot"
"github.com/ChainSafe/gossamer/dot/state"
"github.com/ChainSafe/gossamer/lib/keystore"
"github.com/ChainSafe/gossamer/lib/utils"
log "github.com/ChainSafe/log15"
Expand All @@ -35,6 +36,7 @@ const (
buildSpecCommandName = "build-spec"
importRuntimeCommandName = "import-runtime"
importStateCommandName = "import-state"
pruningStateCommandName = "prune-state"
)

// app is the cli application
Expand Down Expand Up @@ -115,6 +117,18 @@ var (
"Input can be generated by using the RPC function state_getPairs.\n" +
"\tUsage: gossamer import-state --state state.json --header header.json --first-slot <first slot of network>\n",
}

pruningCommand = cli.Command{
Action: FixFlagOrder(pruneState),
Name: pruningStateCommandName,
Usage: "Prune state will prune the state trie",
ArgsUsage: "",
Flags: PruningFlags,
Description: `prune-state <retain-blocks> will prune historical state data.
All trie nodes that do not belong to the specified version state will be deleted from the database.
The default pruning target is the HEAD-256 state`,
}
)

// init initialises the cli application
Expand All @@ -132,6 +146,7 @@ func init() {
buildSpecCommand,
importRuntimeCommand,
importStateCommand,
pruningCommand,
}
app.Flags = RootFlags
}
Expand Down Expand Up @@ -411,3 +426,39 @@ func buildSpecAction(ctx *cli.Context) error {

return nil
}

func pruneState(ctx *cli.Context) error {
tomlCfg, _, err := setupConfigFromChain(ctx)
if err != nil {
logger.Error("failed to load chain configuration", "error", err)
return err
}

inputDBPath := tomlCfg.Global.BasePath
prunedDBPath := ctx.GlobalString(DBPathFlag.Name)
if prunedDBPath == "" {
return fmt.Errorf("path not specified for badger db")
}

bloomSize := ctx.GlobalUint64(BloomFilterSizeFlag.Name)
retainBlocks := ctx.GlobalInt64(RetainBlockNumberFlag.Name)

pruner, err := state.NewPruner(inputDBPath, prunedDBPath, bloomSize, retainBlocks)
if err != nil {
return err
}

logger.Info("Pruner initialised")

err = pruner.SetBloomFilter()
if err != nil {
return fmt.Errorf("failed to set keys into bloom filter %w", err)
}

err = pruner.Prune()
if err != nil {
return fmt.Errorf("failed to prune %w", err)
}

return nil
}
98 changes: 98 additions & 0 deletions cmd/gossamer/prune_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
package main

import (
"fmt"
"strings"
"testing"

"github.com/dgraph-io/badger/v2"

"github.com/stretchr/testify/require"
)

func iterateDB(db *badger.DB, cb func(*badger.Item)) {
txn := db.NewTransaction(false)
itr := txn.NewIterator(badger.DefaultIteratorOptions)

for itr.Rewind(); itr.Valid(); itr.Next() {
cb(itr.Item())
}
}
func runPruneCmd(t *testing.T, configFile, prunedDBPath string) {
ctx, err := newTestContext(
"Test state trie offline pruning --prune-state",
[]string{"config", "pruned-db-path", "bloom-size", "retain-blocks"},
[]interface{}{configFile, prunedDBPath, "256", "5"},
)
if err != nil {
t.Fatal(err)
}

command := pruningCommand
err = command.Run(ctx)
if err != nil {
t.Fatal(err)
}
}

func TestPruneState(t *testing.T) {
var (
inputDBPath = "../../tests/data/db"
configFile = "../../tests/data/db/config.toml"
prunedDBPath = fmt.Sprintf("%s/%s", t.TempDir(), "pruned")
storagePrefix = "storage"
)

inputDB, err := badger.Open(badger.DefaultOptions(inputDBPath).WithReadOnly(true))
require.NoError(t, err)

nonStorageKeys := make(map[string]interface{})
var numStorageKeys int

getKeysInputDB := func(item *badger.Item) {
key := string(item.Key())
if strings.HasPrefix(key, storagePrefix) {
numStorageKeys++
return
}
nonStorageKeys[key] = nil
}
iterateDB(inputDB, getKeysInputDB)

err = inputDB.Close()
require.NoError(t, err)

t.Log("Total keys in input DB", numStorageKeys+len(nonStorageKeys), "storage keys", numStorageKeys)

t.Log("pruned DB path", prunedDBPath)

runPruneCmd(t, configFile, prunedDBPath)

prunedDB, err := badger.Open(badger.DefaultOptions(prunedDBPath))
require.NoError(t, err)

nonStorageKeysPruned := make(map[string]interface{})
var numStorageKeysPruned int

getKeysPrunedDB := func(item *badger.Item) {
key := string(item.Key())
if strings.HasPrefix(key, storagePrefix) {
numStorageKeysPruned++
return
}
nonStorageKeysPruned[key] = nil
}
iterateDB(prunedDB, getKeysPrunedDB)

t.Log("Total keys in pruned DB", len(nonStorageKeysPruned)+numStorageKeysPruned, "storage keys", numStorageKeysPruned)
require.Equal(t, len(nonStorageKeysPruned), len(nonStorageKeys))

// Check all non storage keys are present.
for k := range nonStorageKeys {
_, ok := nonStorageKeysPruned[k]
require.True(t, ok)
}

err = prunedDB.Close()
require.NoError(t, err)
}
1 change: 0 additions & 1 deletion cmd/gossamer/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ import (

"github.com/ChainSafe/gossamer/dot"
"github.com/ChainSafe/gossamer/lib/utils"

log "github.com/ChainSafe/log15"
"github.com/stretchr/testify/require"
"github.com/urfave/cli"
Expand Down
3 changes: 2 additions & 1 deletion dot/network/message_cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ func TestMessageCache(t *testing.T) {
ok = msgCache.exists(peerID, msg)
require.True(t, ok)

time.Sleep(50 * time.Millisecond)
// TODO: Cache has issues with timeout. https://discuss.dgraph.io/t/setwithttl-doesnt-work/14192
time.Sleep(3 * time.Second)

ok = msgCache.exists(peerID, msg)
require.False(t, ok)
Expand Down
59 changes: 59 additions & 0 deletions dot/state/bloom.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package state

import (
"encoding/binary"
"errors"

"github.com/ChainSafe/gossamer/lib/common"
log "github.com/ChainSafe/log15"
bloomfilter "github.com/holiman/bloomfilter/v2"
)

// ErrKeySize is returned when key size does not fit
var ErrKeySize = errors.New("cannot have nil keystore")

type bloomStateHasher []byte

func (f bloomStateHasher) Write(p []byte) (n int, err error) { panic("not implemented") }
func (f bloomStateHasher) Sum(b []byte) []byte { panic("not implemented") }
func (f bloomStateHasher) Reset() { panic("not implemented") }
func (f bloomStateHasher) BlockSize() int { panic("not implemented") }
func (f bloomStateHasher) Size() int { return 8 }
func (f bloomStateHasher) Sum64() uint64 { return binary.BigEndian.Uint64(f) }

// bloomState is a wrapper for bloom filter.
// The keys of all generated entries will be recorded here so that in the pruning
// stage the entries belong to the specific version can be avoided for deletion.
type bloomState struct {
bloom *bloomfilter.Filter
}

// newBloomState creates a brand new state bloom for state generation
// The bloom filter will be created by the passing bloom filter size. the parameters
// are picked so that the false-positive rate for mainnet is low enough.
func newBloomState(size uint64) (*bloomState, error) {
bloom, err := bloomfilter.New(size*1024*1024*8, 4)
if err != nil {
return nil, err
}
log.Info("initialised state bloom", "size", float64(bloom.M()/8))
return &bloomState{bloom: bloom}, nil
}

// put writes key to bloom filter
func (sb *bloomState) put(key []byte) error {
if len(key) != common.HashLength {
return ErrKeySize
}

sb.bloom.Add(bloomStateHasher(key))
return nil
}

// contain is the wrapper of the underlying contains function which
// reports whether the key is contained.
// - If it says yes, the key may be contained
// - If it says no, the key is definitely not contained.
func (sb *bloomState) contain(key []byte) bool {
return sb.bloom.Contains(bloomStateHasher(key))
}
Loading

0 comments on commit af9c925

Please sign in to comment.