Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: fr32-sha2-256-trunc254-padded-binary-tree #19

Merged
merged 10 commits into from
Jul 19, 2023
Prev Previous commit
Next Next commit
trim duplicate logic
  • Loading branch information
Gozala committed Jul 19, 2023
commit 14a27ff076e8c7a058118eecf728e1121285af8f
189 changes: 123 additions & 66 deletions src/multihash.js
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ export const MAX_HEIGHT = 255
export const MAX_PAYLOAD_SIZE = 2 ** 255 * FR_RATIO

/**
* Computes the digest of the given payload.
*
* @param {Uint8Array} payload
* @returns {StreamDigest}
*/
Expand All @@ -47,6 +49,9 @@ export const digest = (payload) => {
}

/**
* Creates a streaming hasher that can be used to consumer larger streams
* of data than it would be practical to load into memory all at once.
*
* @returns {API.StreamingHasher<typeof code, typeof size, StreamDigest>}
*/
export const create = () => new Hasher()
Expand All @@ -59,7 +64,7 @@ export const create = () => new Hasher()
class Hasher {
constructor() {
/**
* The number of bytes written into the hasher.
* The number of bytes consumed by the hasher.
*
* @private
*/
Expand All @@ -86,20 +91,33 @@ class Hasher {
this.offset = 0

/**
* The layers of the tree. Each layer will contain the
* The layers of the tree. Each layer will contain either 0 or 1 nodes
* between writes. When we write into a hasher, if we have enough nodes
* leaves will be created and pushed into the `layers[0]` array, after
* which we flush and combining every two leaf into a node which is moved
* to the next layer. This process is repeated until we reach the top
* layer, leaving each layer either empty or with a single node.
*
* @type {Layers}
*/
this.layers = [[]]
}

/**
* Return the total number of bytes written into the hasher. Calling
* {@link reset} will reset the hasher and the count will be reset to 0.
*
* @returns {bigint}
*/
count() {
return this.bytesWritten
}

/**
* Digest collapses the internal hash state and returns the resulting raw 32
* bytes of commP
* Computes the digest of all the data that has been written into this hasher.
* This method does not have side-effects, meaning that you can continue
* writing and call this method again to compute digest of all the data
* written from the very beginning.
*/
digest() {
const buffer = new Uint8Array(MULTIHASH_SIZE)
Expand All @@ -108,26 +126,43 @@ class Hasher {
}

/**
* Computes the digest and writes into the given buffer. You can provide
* optional `byteOffset` to write digest at that offset in the buffer. By
* default the multihash prefix will be written into the buffer, but you can
* opt-out by passing `false` as the `asMultihash` argument.
*
* @param {Uint8Array} output
* @param {number} [byteOffset]
* @param {boolean} asMultihash
*/
digestInto(output, byteOffset = 0, asMultihash = true) {
const { buffer, layers, offset } = this
// If we have remaining bytes in the buffer we pad with zeros and turn
// them into leaf nodes. Note that it is safe to mutate the buffer here
// as bytes past `offset` are considered dirty.
const nodes = offset > 0 ? split(pad(buffer.fill(0, offset))) : undefined
const { root, height } = computedRoot(layers, nodes)

// We do not want to mutate the layers, so we create a shallow copy of it
// which we will use to compute the root.
let [leaves, ...nodes] = layers

// If we have some bytes in the buffer we fill rest with zeros and compute
// leaves from it. Note that it is safe to mutate the buffer here as bytes
// past `offset` are considered dirty and should not be read.
if (offset > 0) {
leaves = [...leaves, ...split(pad(buffer.fill(0, offset)))]
}

const tree = build([leaves, ...nodes])
const height = tree.length - 1
const [root] = tree[height]

// Write the multihash prefix if requested
if (asMultihash) {
output.set(PREFIX, byteOffset)
byteOffset += PREFIX.length
}

// Write the tree height as the first byte of the digest
output[byteOffset] = height
byteOffset += 1
// Write the root as the remaining 32 bytes of the digest
output.set(root, byteOffset)

return this
Expand All @@ -140,6 +175,7 @@ class Hasher {
const { buffer, offset, layers } = this
const leaves = layers[0]
const { length } = bytes
// If we got no bytes there is nothing to do here
if (length === 0) {
return this
/* c8 ignore next 5 */
Expand All @@ -148,66 +184,59 @@ class Hasher {
`Writing ${length} bytes exceeds max payload size of ${MAX_PAYLOAD_SIZE}`
)
}
// If we do not have enough bytes to fill a quad, just add them to the
// buffer
// If we do not have enough bytes to form a quad, just add append new bytes
// to the buffer and return.
else if (offset + length < buffer.length) {
buffer.set(bytes, offset)
this.offset += length
this.bytesWritten += BigInt(length)
return this
}
// If we are here we have more or equal number of bytes to fill the buffer
// in which case we fill it and process the rest.
// Otherwise we first fill the buffer to form a quad and create some leaves.
// Then we slice remaining bytes into quads sized chunks and create leaves
// from them. If we have some bytes left we copy them into the buffer and
// flush to combining node pairs and propagate them up the tree.
else {
// Number of bytes required to fill the buffer
// Number of bytes required to fill the quad buffer
const bytesRequired = buffer.length - offset
// Fill the remainder of the buffer from the given bytes and then
// create leaf from it
// copy required bytes into the buffer and turn them into leaves
// which we push into the leaf layer.
buffer.set(bytes.subarray(0, bytesRequired), offset)
leaves.push(...split(pad(buffer)))

// Now we slice remaining bytes into quads, create leaves from them
// and push them into the leaf layer.
let readOffset = bytesRequired
// Rest of the bytes are also sliced into quads and
while (readOffset + IN_BYTES_PER_QUAD < length) {
const quad = bytes.subarray(readOffset, readOffset + IN_BYTES_PER_QUAD)
leaves.push(...split(pad(quad)))
readOffset += IN_BYTES_PER_QUAD
}

// Remaining bytes are copied into the buffer
// Whatever byte were left are copied into the buffer and we update
// the offset to reflect that.
this.buffer.set(bytes.subarray(readOffset), 0)
this.offset = length - readOffset

this.flush()

// We also update the total number of bytes written.
this.bytesWritten += BigInt(length)

// Now prune the layers to propagate all the new leaves up the tree.
prune(this.layers)

return this
}
}
flush() {
const { layers } = this
let height = 0
while (height < layers.length) {
const layer = layers[height]
height += 1
let index = 0
while (index + 1 < layer.length) {
const node = Proof.computeNode(layer[index], layer[index + 1])
if (this.layers.length <= height) {
this.layers[height] = [node]
} else {
this.layers[height].push(node)
}
index += 2
}
layer.splice(0, index)
}
}

/**
* Resets this hasher to its initial state so it could be recycled as new
* instance.
*/
reset() {
this.offset = 0
this.bytesWritten = 0n
this.layers = [[]]
this.layers.length = 1
this.layers[0].length = 0
return this
}

Expand Down Expand Up @@ -260,45 +289,73 @@ class Digest {
}

/**
* Prunes layers by combining node pairs into nodes in the next layer and
* removing them from the layer that they were in. After pruning each layer
* will end up with at most one node. New layers may be created in the process
* when nodes from the top layer are combined.
*
* @param {Layers} layers
*/
const prune = (layers) => flush(layers, false)

/**
* Flushes all the nodes in layers by combining node pairs into nodes in the
* next layer. Layers with only one node are combined with zero padded nodes
* (corresponding to the level of the layer). Unlike {@link prune} combined
* nodes are not removed and layers are copied instead of been mutated.
*
* @param {Layers} layers
* @param {API.MerkleTreeNode[]} [newNodes]
*/
const computedRoot = (layers, newNodes = []) => {
const build = (layers) => flush([...layers], true)

/**
* @param {Layers} layers
* @param {boolean} build
* @returns {Layers}
*/
const flush = (layers, build) => {
// Note it is important that we do not mutate any of the layers otherwise
// calling digest() will have a side-effect and produce wrong results.
let height = 0
while (height < layers.length || newNodes.length > 1) {
const layer = layers[height] ?? []
const nodes = newNodes.length ? [...layer, ...newNodes] : layer
// We already copied the nodes from the previous layer so we can clear it
// here in order to accumulate the new nodes for the next layer.
newNodes.length = 0
// writing more data into the hasher and computing the digest will produce
// wrong results.
let level = 0
// We will walk up the tree until we reach the top layer. However, we may end
// up with creating new layers in the process, so we will keep track of the
while (level < layers.length) {
let next = layers[level + 1]
const layer = layers[level]

// If we have the odd number of nodes and we have not reached the top
// layer, we have a bug in the code and we throw an error.
if (nodes.length % 2 > 0 && height + 1 < layers.length) {
nodes.push(ZeroPad.fromLevel(height))
// layer, we push a zero padding node corresponding to the current level.
if (build && layer.length % 2 > 0 && next) {
layer.push(ZeroPad.fromLevel(level))
}

// If we have 0 nodes in the current layer we just move up the tree.
if (nodes.length === 0) {
height += 1
} else {
level += 1

// If we have 0 nodes in the current layer we just move to the next one.
if (layer.length) {
// If we have a next layer and we are building will combine nodes from the current layer
next = next ? (build ? [...next] : next) : []
let index = 0
// Note that we have checked that we have an even number of nodes so
// we will never end up with an extra node when consuming two at a time.
while (index + 1 < nodes.length) {
const left = nodes[index]
const right = nodes[index + 1]
const node = Proof.computeNode(left, right)
newNodes.push(node)
while (index + 1 < layer.length) {
const node = Proof.computeNode(layer[index], layer[index + 1])
next.push(node)
index += 2
}
height += 1

if (next.length) {
layers[level] = next
}

if (!build) {
// we remove nodes that we have combined from the current layer to reduce
// memory overhead and move to the next layer.
layer.splice(0, index)
}
}
}

return newNodes.length
? { root: newNodes[0], height }
: { root: layers[layers.length - 1][0], height: height - 1 }
return layers
}