Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add a sha256 implementation which is optimized for unconstrained runtime #9

Merged
merged 19 commits into from
Feb 4, 2025
8 changes: 5 additions & 3 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@ jobs:

- name: Install bb
run: |
npm install -g bbup
bbup -nv 0.36.0
curl -L https://raw.githubusercontent.com/AztecProtocol/aztec-packages/refs/heads/master/barretenberg/bbup/install | bash
echo "$HOME/.bb/" >> $GITHUB_PATH
~/.bb/bbup -nv 0.36.0
sudo apt install -y libc++-dev

- name: Build Noir benchmark programs
run: nargo export
Expand All @@ -34,7 +36,7 @@ jobs:

- name: Compare gates reports
id: gates_diff
uses: noir-lang/noir-gates-diff@7e4ddaa91c69380f15ccba514eac17bc7432a8cc
uses: noir-lang/noir-gates-diff@dbe920a8dcc3370af4be4f702ca9cef29317bec1
with:
report: gates_report.json
summaryQuantile: 0.9 # only display the 10% most significant circuit size diffs in the summary (defaults to 20%)
Expand Down
1 change: 1 addition & 0 deletions src/lib.nr
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ mod sha256;

pub use sha256::digest;
pub use sha256::sha256_var;

223 changes: 113 additions & 110 deletions src/sha256.nr
Original file line number Diff line number Diff line change
@@ -1,51 +1,17 @@
use std::hash::sha256_compression;
use std::runtime::is_unconstrained;

use constants::{
BLOCK_BYTE_PTR, BLOCK_SIZE, HASH, INITIAL_STATE, INT_BLOCK, INT_BLOCK_SIZE, INT_SIZE,
INT_SIZE_PTR, MSG_BLOCK, MSG_SIZE_PTR, STATE, TWO_POW_16, TWO_POW_24, TWO_POW_32, TWO_POW_8,
};

mod constants;
mod tests;

// Implementation of SHA-256 mapping a byte array of variable length to
// 32 bytes.

// A message block is up to 64 bytes taken from the input.
global BLOCK_SIZE: u32 = 64;

// The first index in the block where the 8 byte message size will be written.
global MSG_SIZE_PTR: u32 = 56;

// Size of the message block when packed as 4-byte integer array.
global INT_BLOCK_SIZE: u32 = 16;

// A `u32` integer consists of 4 bytes.
global INT_SIZE: u32 = 4;

// Index of the integer in the `INT_BLOCK` where the length is written.
global INT_SIZE_PTR: u32 = MSG_SIZE_PTR / INT_SIZE;

// Magic numbers for bit shifting.
// Works with actual bit shifting as well as the compiler turns them into * and /
// but circuit execution appears to be 10% faster this way.
global TWO_POW_8: u32 = 256;
global TWO_POW_16: u32 = TWO_POW_8 * 256;
global TWO_POW_24: u32 = TWO_POW_16 * 256;
global TWO_POW_32: u64 = TWO_POW_24 as u64 * 256;

// Index of a byte in a 64 byte block; ie. 0..=63
type BLOCK_BYTE_PTR = u32;

// The foreign function to compress blocks works on 16 pieces of 4-byte integers, instead of 64 bytes.
type INT_BLOCK = [u32; INT_BLOCK_SIZE];

// A message block is a slice of the original message of a fixed size,
// potentially padded with zeros, with neighbouring 4 bytes packed into integers.
type MSG_BLOCK = INT_BLOCK;

// The hash is 32 bytes.
type HASH = [u8; 32];

// The state accumulates the blocks.
// Its overall size is the same as the `HASH`.
type STATE = [u32; 8];

// Deprecated in favour of `sha256_var`
// docs:start:sha256
pub fn sha256<let N: u32>(input: [u8; N]) -> HASH
Expand All @@ -63,94 +29,132 @@ pub fn digest<let N: u32>(msg: [u8; N]) -> HASH {
// Variable size SHA-256 hash
pub fn sha256_var<let N: u32>(msg: [u8; N], message_size: u64) -> HASH {
let message_size = message_size as u32;
let num_blocks = N / BLOCK_SIZE;
let mut msg_block: MSG_BLOCK = [0; INT_BLOCK_SIZE];
// Intermediate hash, starting with the canonical initial value
let mut h: STATE = [
1779033703, 3144134277, 1013904242, 2773480762, 1359893119, 2600822924, 528734635,
1541459225,
];
// Pointer into msg_block on a 64 byte scale
let mut msg_byte_ptr = 0;
for i in 0..num_blocks {
let msg_start = BLOCK_SIZE * i;
let (new_msg_block, new_msg_byte_ptr) =
unsafe { build_msg_block(msg, message_size, msg_start) };
assert(message_size <= N);

if msg_start < message_size {
msg_block = new_msg_block;
if std::runtime::is_unconstrained() {
// SAfety: SHA256 is running as an unconstrained function.
TomAFrench marked this conversation as resolved.
Show resolved Hide resolved
unsafe {
__sha256_var(msg, message_size)
}
} else {
let mut msg_block: MSG_BLOCK = [0; INT_BLOCK_SIZE];
// Intermediate hash, starting with the canonical initial value
let mut h: STATE = INITIAL_STATE;
// Pointer into msg_block on a 64 byte scale
let mut msg_byte_ptr = 0;
let num_blocks = N / BLOCK_SIZE;
for i in 0..num_blocks {
let msg_start = BLOCK_SIZE * i;
let (new_msg_block, new_msg_byte_ptr) =
unsafe { build_msg_block(msg, message_size, msg_start) };

if msg_start < message_size {
msg_block = new_msg_block;
}

if !is_unconstrained() {
// Verify the block we are compressing was appropriately constructed
let new_msg_byte_ptr = verify_msg_block(msg, message_size, msg_block, msg_start);
if msg_start < message_size {
msg_byte_ptr = new_msg_byte_ptr;
}
} else if msg_start < message_size {
msg_byte_ptr = new_msg_byte_ptr;
}

// If the block is filled, compress it.
// An un-filled block is handled after this loop.
if (msg_start < message_size) & (msg_byte_ptr == BLOCK_SIZE) {
h = sha256_compression(msg_block, h);
// If the block is filled, compress it.
// An un-filled block is handled after this loop.
if (msg_start < message_size) & (msg_byte_ptr == BLOCK_SIZE) {
h = sha256_compression(msg_block, h);
}
}
}

let modulo = N % BLOCK_SIZE;
// Handle setup of the final msg block.
// This case is only hit if the msg is less than the block size,
// or our message cannot be evenly split into blocks.
if modulo != 0 {
let msg_start = BLOCK_SIZE * num_blocks;
let (new_msg_block, new_msg_byte_ptr) =
unsafe { build_msg_block(msg, message_size, msg_start) };
let modulo = N % BLOCK_SIZE;
// Handle setup of the final msg block.
// This case is only hit if the msg is less than the block size,
// or our message cannot be evenly split into blocks.
if modulo != 0 {
let msg_start = BLOCK_SIZE * num_blocks;
let (new_msg_block, new_msg_byte_ptr) =
unsafe { build_msg_block(msg, message_size, msg_start) };

if msg_start < message_size {
msg_block = new_msg_block;
}
if msg_start < message_size {
msg_block = new_msg_block;
}
TomAFrench marked this conversation as resolved.
Show resolved Hide resolved

if !is_unconstrained() {
let new_msg_byte_ptr = verify_msg_block(msg, message_size, msg_block, msg_start);
if msg_start < message_size {
msg_byte_ptr = new_msg_byte_ptr;
verify_msg_block_padding(msg_block, msg_byte_ptr);
}
} else if msg_start < message_size {
msg_byte_ptr = new_msg_byte_ptr;
}

// If we had modulo == 0 then it means the last block was full,
// and we can reset the pointer to zero to overwrite it.
if msg_byte_ptr == BLOCK_SIZE {
msg_byte_ptr = 0;
}

// Pad the rest such that we have a [u32; 2] block at the end representing the length
// of the message, and a block of 1 0 ... 0 following the message (i.e. [1 << 7, 0, ..., 0]).
// Here we rely on the fact that everything beyond the available input is set to 0.
let index = msg_byte_ptr / INT_SIZE;
TomAFrench marked this conversation as resolved.
Show resolved Hide resolved
msg_block[index] = set_item_byte_then_zeros(msg_block[index], msg_byte_ptr, 1 << 7);

msg_byte_ptr = msg_byte_ptr + 1;
let last_block = msg_block;

// If we don't have room to write the size, compress the block and reset it.
if msg_byte_ptr > MSG_SIZE_PTR {
h = sha256_compression(msg_block, h);
// `attach_len_to_msg_block` will zero out everything after the `msg_byte_ptr`.
msg_byte_ptr = 0;
}

msg_block = unsafe { attach_len_to_msg_block(msg_block, msg_byte_ptr, message_size) };

verify_msg_len(msg_block, last_block, msg_byte_ptr, message_size);

hash_final_block(msg_block, h)
}
}

// If we had modulo == 0 then it means the last block was full,
// and we can reset the pointer to zero to overwrite it.
if msg_byte_ptr == BLOCK_SIZE {
msg_byte_ptr = 0;
// Variable size SHA-256 hash
unconstrained fn __sha256_var<let N: u32>(msg: [u8; N], message_size: u32) -> HASH {
let num_full_blocks = message_size / BLOCK_SIZE;
// Intermediate hash, starting with the canonical initial value
let mut h: STATE = INITIAL_STATE;
// Pointer into msg_block on a 64 byte scale
for i in 0..num_full_blocks {
let (msg_block, _) = build_msg_block(msg, message_size, BLOCK_SIZE * i);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As we don't use the msg_byte_ptr value, we can check to make sure that we're not unnecessarily calculating this value in build_msg_block for this case.

h = sha256_compression(msg_block, h);
}
TomAFrench marked this conversation as resolved.
Show resolved Hide resolved

// Handle setup of the final msg block.
// This case is only hit if the msg is less than the block size,
// or our message cannot be evenly split into blocks.
let modulo = message_size % BLOCK_SIZE;
TomAFrench marked this conversation as resolved.
Show resolved Hide resolved
let (mut msg_block, mut msg_byte_ptr): (INT_BLOCK, u32) = if modulo != 0 {
let msg_start = BLOCK_SIZE * num_full_blocks;
let (new_msg_block, new_msg_byte_ptr) = build_msg_block(msg, message_size, msg_start);

(new_msg_block, new_msg_byte_ptr)
} else {
// If we had modulo == 0 then it means the last block was full,
// and we can reset the pointer to zero to overwrite it.
([0; INT_BLOCK_SIZE], 0)
};

// Pad the rest such that we have a [u32; 2] block at the end representing the length
// of the message, and a block of 1 0 ... 0 following the message (i.e. [1 << 7, 0, ..., 0]).
// Here we rely on the fact that everything beyond the available input is set to 0.
msg_block = update_block_item(
msg_block,
msg_byte_ptr,
|msg_item| set_item_byte_then_zeros(msg_item, msg_byte_ptr, 1 << 7),
);
msg_byte_ptr = msg_byte_ptr + 1;
let last_block = msg_block;
let index = msg_byte_ptr / INT_SIZE;
msg_block[index] = set_item_byte_then_zeros(msg_block[index], msg_byte_ptr, 1 << 7);

// If we don't have room to write the size, compress the block and reset it.
if msg_byte_ptr > MSG_SIZE_PTR {
h = sha256_compression(msg_block, h);
let (h, mut msg_byte_ptr): (STATE, u32) = if msg_byte_ptr >= MSG_SIZE_PTR {
// `attach_len_to_msg_block` will zero out everything after the `msg_byte_ptr`.
msg_byte_ptr = 0;
}

msg_block = unsafe { attach_len_to_msg_block(msg_block, msg_byte_ptr, message_size) };

if !is_unconstrained() {
verify_msg_len(msg_block, last_block, msg_byte_ptr, message_size);
}
(sha256_compression(msg_block, h), 0)
} else {
(h, msg_byte_ptr + 1)
};
msg_block = attach_len_to_msg_block(msg_block, msg_byte_ptr, message_size);

hash_final_block(msg_block, h)
}
Expand Down Expand Up @@ -332,17 +336,6 @@ fn verify_msg_block_equals_last(
}
}

// Apply a function on the block item which the pointer indicates.
fn update_block_item<Env>(
mut msg_block: MSG_BLOCK,
msg_byte_ptr: BLOCK_BYTE_PTR,
f: fn[Env](u32) -> u32,
) -> MSG_BLOCK {
let i = msg_byte_ptr / INT_SIZE;
msg_block[i] = f(msg_block[i]);
msg_block
}

// Set the rightmost `zeros` number of bytes to 0.
fn set_item_zeros(item: u32, zeros: u8) -> u32 {
lshift8(rshift8(item, zeros), zeros)
Expand Down Expand Up @@ -517,3 +510,13 @@ fn hash_final_block(msg_block: MSG_BLOCK, mut state: STATE) -> HASH {
out_h
}

mod equivalence_test {

#[test]
fn test_implementations_agree(msg: [u8; 100], message_size: u64) {
let message_size = message_size % 100;
let unconstrained_sha = unsafe { super::__sha256_var(msg, message_size as u32) };
let sha = super::sha256_var(msg, message_size);
assert_eq(sha, unconstrained_sha);
}
}
42 changes: 42 additions & 0 deletions src/sha256/constants.nr
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// A message block is up to 64 bytes taken from the input.
pub(crate) global BLOCK_SIZE: u32 = 64;

// The first index in the block where the 8 byte message size will be written.
pub(crate) global MSG_SIZE_PTR: u32 = 56;

// Size of the message block when packed as 4-byte integer array.
pub(crate) global INT_BLOCK_SIZE: u32 = 16;

// A `u32` integer consists of 4 bytes.
pub(crate) global INT_SIZE: u32 = 4;

// Index of the integer in the `INT_BLOCK` where the length is written.
pub(crate) global INT_SIZE_PTR: u32 = MSG_SIZE_PTR / INT_SIZE;

// Magic numbers for bit shifting.
// Works with actual bit shifting as well as the compiler turns them into * and /
// but circuit execution appears to be 10% faster this way.
pub(crate) global TWO_POW_8: u32 = 256;
pub(crate) global TWO_POW_16: u32 = TWO_POW_8 * 256;
pub(crate) global TWO_POW_24: u32 = TWO_POW_16 * 256;
pub(crate) global TWO_POW_32: u64 = TWO_POW_24 as u64 * 256;

// Index of a byte in a 64 byte block; ie. 0..=63
pub(crate) type BLOCK_BYTE_PTR = u32;

// The foreign function to compress blocks works on 16 pieces of 4-byte integers, instead of 64 bytes.
pub(crate) type INT_BLOCK = [u32; INT_BLOCK_SIZE];

// A message block is a slice of the original message of a fixed size,
// potentially padded with zeros, with neighbouring 4 bytes packed into integers.
pub(crate) type MSG_BLOCK = INT_BLOCK;

// The hash is 32 bytes.
pub(crate) type HASH = [u8; 32];

// The state accumulates the blocks.
// Its overall size is the same as the `HASH`.
pub(crate) type STATE = [u32; 8];

pub(crate) global INITIAL_STATE: STATE =
[1779033703, 3144134277, 1013904242, 2773480762, 1359893119, 2600822924, 528734635, 1541459225];
3 changes: 2 additions & 1 deletion src/sha256/tests.nr
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
use super::{
attach_len_to_msg_block, build_msg_block, byte_into_item, get_item_byte, INT_BLOCK, make_item,
attach_len_to_msg_block, build_msg_block, byte_into_item, get_item_byte, make_item,
set_item_byte_then_zeros, set_item_zeros, sha256, sha256_var,
};
use super::constants::INT_BLOCK;

#[export]
fn test_sha256_1(input: [u8; 1], len: u64) -> [u8; 32] {
Expand Down