Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(cmd): Add offline pruning of state trie. #1564

Merged
merged 18 commits into from
May 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions cmd/gossamer/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,29 @@ var (
}
)

// State Prune flags
var (
// BloomFilterSizeFlag size for bloom filter, valid for the use with prune-state subcommand
BloomFilterSizeFlag = cli.IntFlag{
Name: "bloom-size",
Usage: "Megabytes of memory allocated to bloom-filter for pruning",
Value: 2048,
}

// DBPathFlag data directory for pruned DB, valid for the use with prune-state subcommand
DBPathFlag = cli.StringFlag{
Name: "pruned-db-path",
Usage: "Data directory for the output DB",
}
noot marked this conversation as resolved.
Show resolved Hide resolved

// RetainBlockNumberFlag retain number of block from latest block while pruning, valid for the use with prune-state subcommand
RetainBlockNumberFlag = cli.IntFlag{
Name: "retain-blocks",
Usage: "Retain number of block from latest block while pruning",
Value: 256,
}
)

// flag sets that are shared by multiple commands
var (
// GlobalFlags are flags that are valid for use with the root command and all subcommands
Expand All @@ -276,6 +299,9 @@ var (
CPUProfFlag,
MemProfFlag,
RewindFlag,
DBPathFlag,
BloomFilterSizeFlag,
RetainBlockNumberFlag,
}

// StartupFlags are flags that are valid for use with the root command and the export subcommand
Expand Down Expand Up @@ -354,6 +380,14 @@ var (
HeaderFlag,
FirstSlotFlag,
}

PruningFlags = []cli.Flag{
ChainFlag,
ConfigFlag,
DBPathFlag,
BloomFilterSizeFlag,
RetainBlockNumberFlag,
}
)

// FixFlagOrder allow us to use various flag order formats (ie, `gossamer init
Expand Down
51 changes: 51 additions & 0 deletions cmd/gossamer/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"os"

"github.com/ChainSafe/gossamer/dot"
"github.com/ChainSafe/gossamer/dot/state"
"github.com/ChainSafe/gossamer/lib/keystore"
"github.com/ChainSafe/gossamer/lib/utils"
log "github.com/ChainSafe/log15"
Expand All @@ -35,6 +36,7 @@ const (
buildSpecCommandName = "build-spec"
importRuntimeCommandName = "import-runtime"
importStateCommandName = "import-state"
pruningStateCommandName = "prune-state"
)

// app is the cli application
Expand Down Expand Up @@ -115,6 +117,18 @@ var (
"Input can be generated by using the RPC function state_getPairs.\n" +
"\tUsage: gossamer import-state --state state.json --header header.json --first-slot <first slot of network>\n",
}

pruningCommand = cli.Command{
Action: FixFlagOrder(pruneState),
Name: pruningStateCommandName,
Usage: "Prune state will prune the state trie",
ArgsUsage: "",
Flags: PruningFlags,
Description: `prune-state <retain-blocks> will prune historical state data.
All trie nodes that do not belong to the specified version state will be deleted from the database.

The default pruning target is the HEAD-256 state`,
}
)

// init initialises the cli application
Expand All @@ -132,6 +146,7 @@ func init() {
buildSpecCommand,
importRuntimeCommand,
importStateCommand,
pruningCommand,
}
app.Flags = RootFlags
}
Expand Down Expand Up @@ -411,3 +426,39 @@ func buildSpecAction(ctx *cli.Context) error {

return nil
}

func pruneState(ctx *cli.Context) error {
tomlCfg, _, err := setupConfigFromChain(ctx)
if err != nil {
logger.Error("failed to load chain configuration", "error", err)
return err
}

inputDBPath := tomlCfg.Global.BasePath
prunedDBPath := ctx.GlobalString(DBPathFlag.Name)
if prunedDBPath == "" {
return fmt.Errorf("path not specified for badger db")
}

bloomSize := ctx.GlobalUint64(BloomFilterSizeFlag.Name)
retainBlocks := ctx.GlobalInt64(RetainBlockNumberFlag.Name)

pruner, err := state.NewPruner(inputDBPath, prunedDBPath, bloomSize, retainBlocks)
if err != nil {
return err
}

logger.Info("Pruner initialised")

err = pruner.SetBloomFilter()
if err != nil {
return fmt.Errorf("failed to set keys into bloom filter %w", err)
}

err = pruner.Prune()
if err != nil {
return fmt.Errorf("failed to prune %w", err)
}

return nil
}
98 changes: 98 additions & 0 deletions cmd/gossamer/prune_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
package main

import (
"fmt"
"strings"
"testing"

"github.com/dgraph-io/badger/v2"

"github.com/stretchr/testify/require"
)

func iterateDB(db *badger.DB, cb func(*badger.Item)) {
txn := db.NewTransaction(false)
itr := txn.NewIterator(badger.DefaultIteratorOptions)

for itr.Rewind(); itr.Valid(); itr.Next() {
cb(itr.Item())
}
}
func runPruneCmd(t *testing.T, configFile, prunedDBPath string) {
ctx, err := newTestContext(
"Test state trie offline pruning --prune-state",
[]string{"config", "pruned-db-path", "bloom-size", "retain-blocks"},
[]interface{}{configFile, prunedDBPath, "256", "5"},
)
if err != nil {
t.Fatal(err)
}

command := pruningCommand
err = command.Run(ctx)
if err != nil {
t.Fatal(err)
}
}

func TestPruneState(t *testing.T) {
var (
inputDBPath = "../../tests/data/db"
configFile = "../../tests/data/db/config.toml"
prunedDBPath = fmt.Sprintf("%s/%s", t.TempDir(), "pruned")
storagePrefix = "storage"
)

inputDB, err := badger.Open(badger.DefaultOptions(inputDBPath).WithReadOnly(true))
require.NoError(t, err)

nonStorageKeys := make(map[string]interface{})
var numStorageKeys int

getKeysInputDB := func(item *badger.Item) {
key := string(item.Key())
if strings.HasPrefix(key, storagePrefix) {
numStorageKeys++
return
}
nonStorageKeys[key] = nil
}
iterateDB(inputDB, getKeysInputDB)

err = inputDB.Close()
require.NoError(t, err)

t.Log("Total keys in input DB", numStorageKeys+len(nonStorageKeys), "storage keys", numStorageKeys)

t.Log("pruned DB path", prunedDBPath)

runPruneCmd(t, configFile, prunedDBPath)

prunedDB, err := badger.Open(badger.DefaultOptions(prunedDBPath))
require.NoError(t, err)

nonStorageKeysPruned := make(map[string]interface{})
var numStorageKeysPruned int

getKeysPrunedDB := func(item *badger.Item) {
key := string(item.Key())
if strings.HasPrefix(key, storagePrefix) {
numStorageKeysPruned++
return
}
nonStorageKeysPruned[key] = nil
}
iterateDB(prunedDB, getKeysPrunedDB)

t.Log("Total keys in pruned DB", len(nonStorageKeysPruned)+numStorageKeysPruned, "storage keys", numStorageKeysPruned)
require.Equal(t, len(nonStorageKeysPruned), len(nonStorageKeys))

// Check all non storage keys are present.
for k := range nonStorageKeys {
_, ok := nonStorageKeysPruned[k]
require.True(t, ok)
}

err = prunedDB.Close()
require.NoError(t, err)
}
1 change: 0 additions & 1 deletion cmd/gossamer/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ import (

"github.com/ChainSafe/gossamer/dot"
"github.com/ChainSafe/gossamer/lib/utils"

log "github.com/ChainSafe/log15"
"github.com/stretchr/testify/require"
"github.com/urfave/cli"
Expand Down
3 changes: 2 additions & 1 deletion dot/network/message_cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ func TestMessageCache(t *testing.T) {
ok = msgCache.exists(peerID, msg)
require.True(t, ok)

time.Sleep(50 * time.Millisecond)
// TODO: Cache has issues with timeout. https://discuss.dgraph.io/t/setwithttl-doesnt-work/14192
time.Sleep(3 * time.Second)

ok = msgCache.exists(peerID, msg)
require.False(t, ok)
Expand Down
59 changes: 59 additions & 0 deletions dot/state/bloom.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package state

import (
"encoding/binary"
"errors"

"github.com/ChainSafe/gossamer/lib/common"
log "github.com/ChainSafe/log15"
bloomfilter "github.com/holiman/bloomfilter/v2"
)

// ErrKeySize is returned when key size does not fit
var ErrKeySize = errors.New("cannot have nil keystore")

type bloomStateHasher []byte

func (f bloomStateHasher) Write(p []byte) (n int, err error) { panic("not implemented") }
func (f bloomStateHasher) Sum(b []byte) []byte { panic("not implemented") }
func (f bloomStateHasher) Reset() { panic("not implemented") }
func (f bloomStateHasher) BlockSize() int { panic("not implemented") }
func (f bloomStateHasher) Size() int { return 8 }
func (f bloomStateHasher) Sum64() uint64 { return binary.BigEndian.Uint64(f) }

// bloomState is a wrapper for bloom filter.
// The keys of all generated entries will be recorded here so that in the pruning
// stage the entries belong to the specific version can be avoided for deletion.
type bloomState struct {
bloom *bloomfilter.Filter
}

// newBloomState creates a brand new state bloom for state generation
// The bloom filter will be created by the passing bloom filter size. the parameters
// are picked so that the false-positive rate for mainnet is low enough.
func newBloomState(size uint64) (*bloomState, error) {
bloom, err := bloomfilter.New(size*1024*1024*8, 4)
if err != nil {
return nil, err
}
log.Info("initialised state bloom", "size", float64(bloom.M()/8))
return &bloomState{bloom: bloom}, nil
}

// put writes key to bloom filter
func (sb *bloomState) put(key []byte) error {
if len(key) != common.HashLength {
return ErrKeySize
}

sb.bloom.Add(bloomStateHasher(key))
return nil
}

// contain is the wrapper of the underlying contains function which
// reports whether the key is contained.
// - If it says yes, the key may be contained
// - If it says no, the key is definitely not contained.
func (sb *bloomState) contain(key []byte) bool {
return sb.bloom.Contains(bloomStateHasher(key))
}
Loading