-
Notifications
You must be signed in to change notification settings - Fork 129
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(lib/trie): Parallel hash trie. #1657
Changes from 6 commits
0f79411
09d1f51
5c8de78
0c47f02
f192aaa
12466e5
08fba2e
69a268f
392c1e5
5df00f1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -17,31 +17,62 @@ | |||||
package trie | ||||||
|
||||||
import ( | ||||||
"bytes" | ||||||
"context" | ||||||
"hash" | ||||||
"sync" | ||||||
|
||||||
"github.com/ChainSafe/gossamer/lib/common" | ||||||
"github.com/ChainSafe/gossamer/lib/scale" | ||||||
"golang.org/x/crypto/blake2b" | ||||||
"golang.org/x/sync/errgroup" | ||||||
) | ||||||
|
||||||
type sliceBuffer []byte | ||||||
|
||||||
func (b *sliceBuffer) write(data []byte) { | ||||||
*b = append(*b, data...) | ||||||
} | ||||||
|
||||||
func (b *sliceBuffer) reset() { | ||||||
*b = (*b)[:0] | ||||||
} | ||||||
|
||||||
// Hasher is a wrapper around a hash function | ||||||
type Hasher struct { | ||||||
hash hash.Hash | ||||||
hash hash.Hash | ||||||
tmp sliceBuffer | ||||||
parallel bool // Whether to use parallel threads when hashing | ||||||
} | ||||||
|
||||||
// hasherPool creates a pool of Hasher. | ||||||
var hasherPool = sync.Pool{ | ||||||
New: func() interface{} { | ||||||
h, _ := blake2b.New256(nil) | ||||||
|
||||||
return &Hasher{ | ||||||
tmp: make(sliceBuffer, 0, 520), // cap is as large as a full branch node. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how was this calculated? branch nodes can still store a value, so the cap might be larger than this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I assumed it will be only keyed. I will look into this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. let me know if you fix this and I can take another look |
||||||
hash: h, | ||||||
} | ||||||
}, | ||||||
} | ||||||
|
||||||
// NewHasher create new Hasher instance | ||||||
func NewHasher() (*Hasher, error) { | ||||||
h, err := blake2b.New256(nil) | ||||||
if err != nil { | ||||||
return nil, err | ||||||
} | ||||||
func NewHasher(parallel bool) *Hasher { | ||||||
h := hasherPool.Get().(*Hasher) | ||||||
h.parallel = parallel | ||||||
return h | ||||||
} | ||||||
|
||||||
return &Hasher{ | ||||||
hash: h, | ||||||
}, nil | ||||||
func (h *Hasher) returnToPool() { | ||||||
h.tmp.reset() | ||||||
h.hash.Reset() | ||||||
hasherPool.Put(h) | ||||||
} | ||||||
|
||||||
// Hash encodes the node and then hashes it if its encoded length is > 32 bytes | ||||||
func (h *Hasher) Hash(n node) (res []byte, err error) { | ||||||
encNode, err := n.encode() | ||||||
encNode, err := h.encode(n) | ||||||
if err != nil { | ||||||
return nil, err | ||||||
} | ||||||
|
@@ -51,6 +82,7 @@ func (h *Hasher) Hash(n node) (res []byte, err error) { | |||||
return encNode, nil | ||||||
} | ||||||
|
||||||
h.hash.Reset() | ||||||
// otherwise, hash encoded node | ||||||
_, err = h.hash.Write(encNode) | ||||||
if err == nil { | ||||||
|
@@ -59,3 +91,134 @@ func (h *Hasher) Hash(n node) (res []byte, err error) { | |||||
|
||||||
return res, err | ||||||
} | ||||||
|
||||||
// encode is the high-level function wrapping the encoding for different node types | ||||||
// encoding has the following format: | ||||||
// NodeHeader | Extra partial key length | Partial Key | Value | ||||||
func (h *Hasher) encode(n node) ([]byte, error) { | ||||||
switch n := n.(type) { | ||||||
case *branch: | ||||||
return h.encodeBranch(n) | ||||||
case *leaf: | ||||||
return h.encodeLeaf(n) | ||||||
case nil: | ||||||
return []byte{0}, nil | ||||||
} | ||||||
|
||||||
return nil, nil | ||||||
} | ||||||
|
||||||
func encodeAndHash(n node) ([]byte, error) { | ||||||
h := NewHasher(false) | ||||||
defer h.returnToPool() | ||||||
|
||||||
encChild, err := h.Hash(n) | ||||||
if err != nil { | ||||||
return nil, err | ||||||
} | ||||||
|
||||||
scEncChild, err := scale.Encode(encChild) | ||||||
if err != nil { | ||||||
return nil, err | ||||||
} | ||||||
return scEncChild, nil | ||||||
} | ||||||
|
||||||
// encodeBranch encodes a branch with the encoding specified at the top of this package | ||||||
func (h *Hasher) encodeBranch(b *branch) ([]byte, error) { | ||||||
if !b.dirty && b.encoding != nil { | ||||||
return b.encoding, nil | ||||||
} | ||||||
h.tmp.reset() | ||||||
|
||||||
encoding, err := b.header() | ||||||
h.tmp.write(encoding) | ||||||
if err != nil { | ||||||
return nil, err | ||||||
} | ||||||
|
||||||
h.tmp.write(nibblesToKeyLE(b.key)) | ||||||
h.tmp.write(common.Uint16ToBytes(b.childrenBitmap())) | ||||||
|
||||||
if b.value != nil { | ||||||
buffer := bytes.Buffer{} | ||||||
se := scale.Encoder{Writer: &buffer} | ||||||
_, err = se.Encode(b.value) | ||||||
if err != nil { | ||||||
return h.tmp, err | ||||||
} | ||||||
h.tmp.write(buffer.Bytes()) | ||||||
} | ||||||
|
||||||
if h.parallel { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit:
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||||||
wg, _ := errgroup.WithContext(context.Background()) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TIL |
||||||
resBuff := make([][]byte, 16) | ||||||
for i := 0; i < 16; i++ { | ||||||
func(i int) { | ||||||
wg.Go(func() error { | ||||||
child := b.children[i] | ||||||
if child == nil { | ||||||
return nil | ||||||
} | ||||||
|
||||||
var err error | ||||||
resBuff[i], err = encodeAndHash(child) | ||||||
if err != nil { | ||||||
return err | ||||||
} | ||||||
return nil | ||||||
}) | ||||||
}(i) | ||||||
} | ||||||
if err := wg.Wait(); err != nil { | ||||||
return nil, err | ||||||
} | ||||||
|
||||||
for _, v := range resBuff { | ||||||
if v != nil { | ||||||
h.tmp.write(v) | ||||||
} | ||||||
} | ||||||
} else { | ||||||
for i := 0; i < 16; i++ { | ||||||
if child := b.children[i]; child != nil { | ||||||
scEncChild, err := encodeAndHash(child) | ||||||
if err != nil { | ||||||
return nil, err | ||||||
} | ||||||
h.tmp.write(scEncChild) | ||||||
} | ||||||
} | ||||||
} | ||||||
|
||||||
return h.tmp, nil | ||||||
} | ||||||
|
||||||
// encodeLeaf encodes a leaf with the encoding specified at the top of this package | ||||||
func (h *Hasher) encodeLeaf(l *leaf) ([]byte, error) { | ||||||
if !l.dirty && l.encoding != nil { | ||||||
return l.encoding, nil | ||||||
} | ||||||
|
||||||
h.tmp.reset() | ||||||
|
||||||
encoding, err := l.header() | ||||||
h.tmp.write(encoding) | ||||||
if err != nil { | ||||||
return nil, err | ||||||
} | ||||||
|
||||||
h.tmp.write(nibblesToKeyLE(l.key)) | ||||||
|
||||||
buffer := bytes.Buffer{} | ||||||
se := scale.Encoder{Writer: &buffer} | ||||||
|
||||||
_, err = se.Encode(l.value) | ||||||
if err != nil { | ||||||
return nil, err | ||||||
} | ||||||
|
||||||
h.tmp.write(buffer.Bytes()) | ||||||
l.encoding = h.tmp | ||||||
return h.tmp, nil | ||||||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
wondering why not use
bytes.Buffer
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Because we are calculating the upper limit of the slice and preallocating it. Thus, avoiding dynamic allocation.