feat(cmd): implement offline pruning of state trie (#1564)

ChainSafe · May 20, 2021 · af9c925 · af9c925
1 parent 935bc59
commit af9c925
Show file tree

Hide file tree

Showing 18 changed files with 557 additions and 32 deletions.
diff --git a/cmd/gossamer/flags.go b/cmd/gossamer/flags.go
@@ -264,6 +264,29 @@ var (
 	}
 )
 
+// State Prune flags
+var (
+	// BloomFilterSizeFlag size for bloom filter, valid for the use with prune-state subcommand
+	BloomFilterSizeFlag = cli.IntFlag{
+		Name:  "bloom-size",
+		Usage: "Megabytes of memory allocated to bloom-filter for pruning",
+		Value: 2048,
+	}
+
+	// DBPathFlag data directory for pruned DB, valid for the use with prune-state subcommand
+	DBPathFlag = cli.StringFlag{
+		Name:  "pruned-db-path",
+		Usage: "Data directory for the output DB",
+	}
+
+	// RetainBlockNumberFlag retain number of block from latest block while pruning, valid for the use with prune-state subcommand
+	RetainBlockNumberFlag = cli.IntFlag{
+		Name:  "retain-blocks",
+		Usage: "Retain number of block from latest block while pruning",
+		Value: 256,
+	}
+)
+
 // flag sets that are shared by multiple commands
 var (
 	// GlobalFlags are flags that are valid for use with the root command and all subcommands
@@ -276,6 +299,9 @@ var (
 		CPUProfFlag,
 		MemProfFlag,
 		RewindFlag,
+		DBPathFlag,
+		BloomFilterSizeFlag,
+		RetainBlockNumberFlag,
 	}
 
 	// StartupFlags are flags that are valid for use with the root command and the export subcommand
@@ -354,6 +380,14 @@ var (
 		HeaderFlag,
 		FirstSlotFlag,
 	}
+
+	PruningFlags = []cli.Flag{
+		ChainFlag,
+		ConfigFlag,
+		DBPathFlag,
+		BloomFilterSizeFlag,
+		RetainBlockNumberFlag,
+	}
 )
 
 // FixFlagOrder allow us to use various flag order formats (ie, `gossamer init

diff --git a/cmd/gossamer/main.go b/cmd/gossamer/main.go
@@ -22,6 +22,7 @@ import (
 	"os"
 
 	"github.com/ChainSafe/gossamer/dot"
+	"github.com/ChainSafe/gossamer/dot/state"
 	"github.com/ChainSafe/gossamer/lib/keystore"
 	"github.com/ChainSafe/gossamer/lib/utils"
 	log "github.com/ChainSafe/log15"
@@ -35,6 +36,7 @@ const (
 	buildSpecCommandName     = "build-spec"
 	importRuntimeCommandName = "import-runtime"
 	importStateCommandName   = "import-state"
+	pruningStateCommandName  = "prune-state"
 )
 
 // app is the cli application
@@ -115,6 +117,18 @@ var (
 			"Input can be generated by using the RPC function state_getPairs.\n" +
 			"\tUsage: gossamer import-state --state state.json --header header.json --first-slot <first slot of network>\n",
 	}
+
+	pruningCommand = cli.Command{
+		Action:    FixFlagOrder(pruneState),
+		Name:      pruningStateCommandName,
+		Usage:     "Prune state will prune the state trie",
+		ArgsUsage: "",
+		Flags:     PruningFlags,
+		Description: `prune-state <retain-blocks> will prune historical state data.
+		All trie nodes that do not belong to the specified version state will be deleted from the database.
+
+		The default pruning target is the HEAD-256 state`,
+	}
 )
 
 // init initialises the cli application
@@ -132,6 +146,7 @@ func init() {
 		buildSpecCommand,
 		importRuntimeCommand,
 		importStateCommand,
+		pruningCommand,
 	}
 	app.Flags = RootFlags
 }
@@ -411,3 +426,39 @@ func buildSpecAction(ctx *cli.Context) error {
 
 	return nil
 }
+
+func pruneState(ctx *cli.Context) error {
+	tomlCfg, _, err := setupConfigFromChain(ctx)
+	if err != nil {
+		logger.Error("failed to load chain configuration", "error", err)
+		return err
+	}
+
+	inputDBPath := tomlCfg.Global.BasePath
+	prunedDBPath := ctx.GlobalString(DBPathFlag.Name)
+	if prunedDBPath == "" {
+		return fmt.Errorf("path not specified for badger db")
+	}
+
+	bloomSize := ctx.GlobalUint64(BloomFilterSizeFlag.Name)
+	retainBlocks := ctx.GlobalInt64(RetainBlockNumberFlag.Name)
+
+	pruner, err := state.NewPruner(inputDBPath, prunedDBPath, bloomSize, retainBlocks)
+	if err != nil {
+		return err
+	}
+
+	logger.Info("Pruner initialised")
+
+	err = pruner.SetBloomFilter()
+	if err != nil {
+		return fmt.Errorf("failed to set keys into bloom filter %w", err)
+	}
+
+	err = pruner.Prune()
+	if err != nil {
+		return fmt.Errorf("failed to prune %w", err)
+	}
+
+	return nil
+}
diff --git a/cmd/gossamer/prune_test.go b/cmd/gossamer/prune_test.go
@@ -0,0 +1,98 @@
+package main
+
+import (
+	"fmt"
+	"strings"
+	"testing"
+
+	"github.com/dgraph-io/badger/v2"
+
+	"github.com/stretchr/testify/require"
+)
+
+func iterateDB(db *badger.DB, cb func(*badger.Item)) {
+	txn := db.NewTransaction(false)
+	itr := txn.NewIterator(badger.DefaultIteratorOptions)
+
+	for itr.Rewind(); itr.Valid(); itr.Next() {
+		cb(itr.Item())
+	}
+}
+func runPruneCmd(t *testing.T, configFile, prunedDBPath string) {
+	ctx, err := newTestContext(
+		"Test state trie offline pruning  --prune-state",
+		[]string{"config", "pruned-db-path", "bloom-size", "retain-blocks"},
+		[]interface{}{configFile, prunedDBPath, "256", "5"},
+	)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	command := pruningCommand
+	err = command.Run(ctx)
+	if err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestPruneState(t *testing.T) {
+	var (
+		inputDBPath   = "../../tests/data/db"
+		configFile    = "../../tests/data/db/config.toml"
+		prunedDBPath  = fmt.Sprintf("%s/%s", t.TempDir(), "pruned")
+		storagePrefix = "storage"
+	)
+
+	inputDB, err := badger.Open(badger.DefaultOptions(inputDBPath).WithReadOnly(true))
+	require.NoError(t, err)
+
+	nonStorageKeys := make(map[string]interface{})
+	var numStorageKeys int
+
+	getKeysInputDB := func(item *badger.Item) {
+		key := string(item.Key())
+		if strings.HasPrefix(key, storagePrefix) {
+			numStorageKeys++
+			return
+		}
+		nonStorageKeys[key] = nil
+	}
+	iterateDB(inputDB, getKeysInputDB)
+
+	err = inputDB.Close()
+	require.NoError(t, err)
+
+	t.Log("Total keys in input DB", numStorageKeys+len(nonStorageKeys), "storage keys", numStorageKeys)
+
+	t.Log("pruned DB path", prunedDBPath)
+
+	runPruneCmd(t, configFile, prunedDBPath)
+
+	prunedDB, err := badger.Open(badger.DefaultOptions(prunedDBPath))
+	require.NoError(t, err)
+
+	nonStorageKeysPruned := make(map[string]interface{})
+	var numStorageKeysPruned int
+
+	getKeysPrunedDB := func(item *badger.Item) {
+		key := string(item.Key())
+		if strings.HasPrefix(key, storagePrefix) {
+			numStorageKeysPruned++
+			return
+		}
+		nonStorageKeysPruned[key] = nil
+	}
+	iterateDB(prunedDB, getKeysPrunedDB)
+
+	t.Log("Total keys in pruned DB", len(nonStorageKeysPruned)+numStorageKeysPruned, "storage keys", numStorageKeysPruned)
+	require.Equal(t, len(nonStorageKeysPruned), len(nonStorageKeys))
+
+	// Check all non storage keys are present.
+	for k := range nonStorageKeys {
+		_, ok := nonStorageKeysPruned[k]
+		require.True(t, ok)
+	}
+
+	err = prunedDB.Close()
+	require.NoError(t, err)
+}
diff --git a/cmd/gossamer/utils.go b/cmd/gossamer/utils.go
@@ -28,7 +28,6 @@ import (
 
 	"github.com/ChainSafe/gossamer/dot"
 	"github.com/ChainSafe/gossamer/lib/utils"
-
 	log "github.com/ChainSafe/log15"
 	"github.com/stretchr/testify/require"
 	"github.com/urfave/cli"

diff --git a/dot/network/message_cache_test.go b/dot/network/message_cache_test.go
@@ -42,7 +42,8 @@ func TestMessageCache(t *testing.T) {
 	ok = msgCache.exists(peerID, msg)
 	require.True(t, ok)
 
-	time.Sleep(50 * time.Millisecond)
+	// TODO: Cache has issues with timeout. https://discuss.dgraph.io/t/setwithttl-doesnt-work/14192
+	time.Sleep(3 * time.Second)
 
 	ok = msgCache.exists(peerID, msg)
 	require.False(t, ok)

diff --git a/dot/state/bloom.go b/dot/state/bloom.go
@@ -0,0 +1,59 @@
+package state
+
+import (
+	"encoding/binary"
+	"errors"
+
+	"github.com/ChainSafe/gossamer/lib/common"
+	log "github.com/ChainSafe/log15"
+	bloomfilter "github.com/holiman/bloomfilter/v2"
+)
+
+// ErrKeySize is returned when key size does not fit
+var ErrKeySize = errors.New("cannot have nil keystore")
+
+type bloomStateHasher []byte
+
+func (f bloomStateHasher) Write(p []byte) (n int, err error) { panic("not implemented") }
+func (f bloomStateHasher) Sum(b []byte) []byte               { panic("not implemented") }
+func (f bloomStateHasher) Reset()                            { panic("not implemented") }
+func (f bloomStateHasher) BlockSize() int                    { panic("not implemented") }
+func (f bloomStateHasher) Size() int                         { return 8 }
+func (f bloomStateHasher) Sum64() uint64                     { return binary.BigEndian.Uint64(f) }
+
+// bloomState is a wrapper for bloom filter.
+// The keys of all generated entries will be recorded here so that in the pruning
+// stage the entries belong to the specific version can be avoided for deletion.
+type bloomState struct {
+	bloom *bloomfilter.Filter
+}
+
+// newBloomState creates a brand new state bloom for state generation
+// The bloom filter will be created by the passing bloom filter size. the parameters
+// are picked so that the false-positive rate for mainnet is low enough.
+func newBloomState(size uint64) (*bloomState, error) {
+	bloom, err := bloomfilter.New(size*1024*1024*8, 4)
+	if err != nil {
+		return nil, err
+	}
+	log.Info("initialised state bloom", "size", float64(bloom.M()/8))
+	return &bloomState{bloom: bloom}, nil
+}
+
+// put writes key to bloom filter
+func (sb *bloomState) put(key []byte) error {
+	if len(key) != common.HashLength {
+		return ErrKeySize
+	}
+
+	sb.bloom.Add(bloomStateHasher(key))
+	return nil
+}
+
+// contain is the wrapper of the underlying contains function which
+// reports whether the key is contained.
+// - If it says yes, the key may be contained
+// - If it says no, the key is definitely not contained.
+func (sb *bloomState) contain(key []byte) bool {
+	return sb.bloom.Contains(bloomStateHasher(key))
+}