Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Poseidon2 half output #2514

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
10 changes: 5 additions & 5 deletions plonky3/src/params/poseidon2/goldilocks/powdr_accel_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ use p3_field::AbstractField;
use p3_goldilocks::Goldilocks;
use p3_symmetric::CryptographicPermutation;
use powdr_riscv_runtime::{
goldilocks::{extract_opaque_vec8, Goldilocks as PowdrGoldilocks, OpaqueGoldilocks},
hash::{poseidon2_gl, poseidon2_gl_inplace},
goldilocks::{extract_opaque_vec, Goldilocks as PowdrGoldilocks, OpaqueGoldilocks},
hash::poseidon2_gl_inplace,
};

#[derive(Clone, Copy, Debug)]
Expand All @@ -21,10 +21,10 @@ impl p3_symmetric::Permutation<[Goldilocks; 8]> for Permutation {
// canonical representation internally, so it is safe to cast between their
// array's pointers.
let input = unsafe { &*(&input as *const _ as *const [PowdrGoldilocks; 8]) };
let input = input.map(|x| OpaqueGoldilocks::from(x));
let output = poseidon2_gl(&input);
let mut state = input.map(|x| OpaqueGoldilocks::from(x));
poseidon2_gl_inplace(&mut state);

extract_opaque_vec8(&output).map(|x| Goldilocks::from_canonical_u64(x))
extract_opaque_vec::<8>(&state).map(|x| Goldilocks::from_canonical_u64(x))
}

fn permute_mut(&self, data: &mut [Goldilocks; 8]) {
Expand Down
11 changes: 10 additions & 1 deletion riscv-executor/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2438,7 +2438,16 @@ impl<F: FieldElement> Executor<'_, '_, F> {
.try_into()
.unwrap();

let output_half = self.proc.get_reg_mem(args[2].u()).u();

let result = poseidon2_gl::poseidon2_gl(&inputs);
let result = match output_half {
0 => &result[0..0],
1 => &result[0..4],
2 => &result[4..8],
3 => &result[0..8],
_ => unreachable!(),
};

let output_ptr = self.proc.get_reg_mem(args[1].u()).u();
assert!(is_multiple_of_4(output_ptr));
Expand Down Expand Up @@ -2668,7 +2677,7 @@ impl<F: FieldElement> Executor<'_, '_, F> {
let output_ptr = self.proc.get_reg_mem(args[1].u()).u();
assert!(is_multiple_of_4(output_ptr));

let result = (0..8)
let result = (0..4)
.flat_map(|i| {
let v = self.proc.get_mem(input_ptr + i * 4, 0, 0).into_fe();
let v = v.to_integer().try_into_u64().unwrap();
Expand Down
10 changes: 8 additions & 2 deletions riscv-runtime/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,14 @@ homepage = "https://powdr.org"
repository = "https://github.com/powdr-labs/powdr"

[dependencies]
serde = { version = "1.0", default-features = false, features = ["alloc", "derive", "rc"] }
serde_cbor = { version = "0.11.2", default-features = false, features = ["alloc"] }
serde = { version = "1.0", default-features = false, features = [
"alloc",
"derive",
"rc",
] }
serde_cbor = { version = "0.11.2", default-features = false, features = [
"alloc",
] }
powdr-riscv-syscalls = { path = "../riscv-syscalls", version = "0.1.4" }
getrandom = { version = "0.2", features = ["custom"], optional = true }
spin = "0.9"
Expand Down
20 changes: 17 additions & 3 deletions riscv-runtime/src/goldilocks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,24 @@ impl From<Goldilocks> for OpaqueGoldilocks {
}

/// Extract the Goldilocks values from the OpaqueGoldilocks values.
pub fn extract_opaque_vec8(vec: &[OpaqueGoldilocks; 8]) -> [u64; 8] {
///
/// The array size must be a multiple of 4.
pub fn extract_opaque_vec<const N: usize>(vec: &[OpaqueGoldilocks; N]) -> [u64; N] {
assert_eq!(N % 4, 0);
unsafe {
let mut output: MaybeUninit<[u64; 8]> = MaybeUninit::uninit();
ecall!(Syscall::SplitGLVec, in("a0") vec, in("a1") output.as_mut_ptr());
let mut output: MaybeUninit<[u64; N]> = MaybeUninit::uninit();

let input_ptr = vec.as_ptr();
let output_ptr = (*output.as_mut_ptr()).as_mut_ptr();

for i in 0..(N / 4) {
ecall!(
Syscall::SplitGLVec,
in("a0") input_ptr.add(i * 4),
in("a1") output_ptr.add(i * 4)
);
}

output.assume_init()
}
}
Expand Down
21 changes: 16 additions & 5 deletions riscv-runtime/src/hash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,26 @@ pub fn poseidon_gl(data: &mut [Goldilocks; 12]) -> &[Goldilocks; 4] {
/// Perform one Poseidon2 permutation with 8 Goldilocks field elements in-place.
pub fn poseidon2_gl_inplace(data: &mut [OpaqueGoldilocks; 8]) {
unsafe {
ecall!(Syscall::Poseidon2GL, in("a0") data, in("a1") data);
ecall!(Syscall::Poseidon2GL, in("a0") data, in("a1") data, in("a2") 3);
}
}

/// Perform one Poseidon2 permutation with 8 Goldilocks field elements.
pub fn poseidon2_gl(data: &[OpaqueGoldilocks; 8]) -> [OpaqueGoldilocks; 8] {
#[repr(u32)]
pub enum Poseidon2OutputHalf {
//None = 0,
FirstHalf = 1,
SecondHalf = 2,
//Full = 3,
}
Comment on lines +35 to +40
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
pub enum Poseidon2OutputHalf {
//None = 0,
FirstHalf = 1,
SecondHalf = 2,
//Full = 3,
}
pub enum Poseidon2OutputHalf {
FirstHalf = 1,
SecondHalf = 2,
}

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These comments act as a sort of documentation... do you really want them gone?


/// Perform one Poseidon2 compression with 8 Goldilocks field elements.
pub fn poseidon2_gl_compression(
data: &[OpaqueGoldilocks; 8],
output_half: Poseidon2OutputHalf,
) -> [OpaqueGoldilocks; 4] {
unsafe {
let mut output: MaybeUninit<[OpaqueGoldilocks; 8]> = MaybeUninit::uninit();
ecall!(Syscall::Poseidon2GL, in("a0") data, in("a1") output.as_mut_ptr());
let mut output: MaybeUninit<[OpaqueGoldilocks; 4]> = MaybeUninit::uninit();
ecall!(Syscall::Poseidon2GL, in("a0") data, in("a1") output.as_mut_ptr(), in("a2") output_half as u32);
output.assume_init()
}
}
Expand Down
11 changes: 6 additions & 5 deletions riscv/src/large_field/runtime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -277,10 +277,11 @@ impl Runtime {
None,
"poseidon2_gl",
vec!["memory", "MIN_DEGREE", "LARGE_SUBMACHINES_MAX_DEGREE"],
[r#"instr poseidon2_gl X, Y
[r#"instr poseidon2_gl X, Y, Z
link ~> tmp1_col = regs.mload(X, STEP)
link ~> tmp2_col = regs.mload(Y, STEP + 1)
link ~> poseidon2_gl.poseidon2_permutation(tmp1_col, tmp2_col, STEP)
link ~> tmp3_col = regs.mload(Z, STEP + 2)
link ~> poseidon2_gl.permute(tmp1_col, STEP, tmp2_col, STEP + 1, tmp3_col)
{
// make sure tmp1_col and tmp2_col are 4-byte aligned memory addresses
tmp1_col = 4 * (X_b1 + X_b2 * 0x100 + X_b3 * 0x10000 + X_b4 * 0x1000000),
Expand All @@ -290,15 +291,15 @@ impl Runtime {
);

self.add_submachine(
"std::machines::split::split_gl_vec::SplitGLVec8",
"std::machines::split::split_gl_vec::SplitGLVec4",
None,
"split_gl_vec",
vec!["memory", "split_gl", "MIN_DEGREE", "MAIN_MAX_DEGREE"],
[
r#"instr split_gl_vec X, Y
link ~> tmp1_col = regs.mload(X, STEP)
link ~> tmp2_col = regs.mload(Y, STEP + 1)
link ~> split_gl_vec.split(tmp1_col, tmp2_col, STEP + 2)
link ~> split_gl_vec.split(tmp1_col, tmp2_col, STEP)
{
// make sure tmp1_col and tmp2_col are 4-byte aligned memory addresses
tmp1_col = 4 * (X_b1 + X_b2 * 0x100 + X_b3 * 0x10000 + X_b4 * 0x1000000),
Expand Down Expand Up @@ -331,7 +332,7 @@ impl Runtime {
// they can overlap.
self.add_syscall(
Syscall::Poseidon2GL,
std::iter::once("poseidon2_gl 10, 11;".to_string()),
std::iter::once(format!("{} 10, 11, 12;", Syscall::Poseidon2GL.name())),
);

self.add_syscall(
Expand Down
49 changes: 22 additions & 27 deletions riscv/tests/riscv_data/poseidon2_gl_via_coprocessor/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,46 +2,44 @@
#![no_std]

use powdr_riscv_runtime::{
goldilocks::{extract_opaque_vec8, Goldilocks, OpaqueGoldilocks, PRIME},
hash::{poseidon2_gl, poseidon2_gl_inplace},
goldilocks::{extract_opaque_vec, Goldilocks, OpaqueGoldilocks, PRIME},
hash::{poseidon2_gl_compression, poseidon2_gl_inplace, Poseidon2OutputHalf},
};

#[no_mangle]
fn main() {
let i = [OpaqueGoldilocks::from(0); 8];
let h = extract_opaque_vec8(&poseidon2_gl(&i));
let h = extract_opaque_vec::<4>(&poseidon2_gl_compression(
&i,
Poseidon2OutputHalf::FirstHalf,
));
assert_eq!(h[0], 14905565590733827480);
assert_eq!(h[1], 640905753703258831);
assert_eq!(h[2], 4579128623722792381);
assert_eq!(h[3], 158153743058056413);
assert_eq!(h[4], 5905145432652609062);
assert_eq!(h[5], 9814446752588696081);
assert_eq!(h[6], 13759450385053274731);
assert_eq!(h[7], 2402148582355896469);

let i = [OpaqueGoldilocks::from(1); 8];
let h = extract_opaque_vec8(&poseidon2_gl(&i));
assert_eq!(h[0], 18201552556563266798);
assert_eq!(h[1], 6814935789744812745);
assert_eq!(h[2], 5947349602629011250);
assert_eq!(h[3], 15482468195247053191);
assert_eq!(h[4], 2971437633000883992);
assert_eq!(h[5], 9752341516515962403);
assert_eq!(h[6], 15477293561177957600);
assert_eq!(h[7], 13574628582471329853);
let h = extract_opaque_vec::<4>(&poseidon2_gl_compression(
&i,
Poseidon2OutputHalf::SecondHalf,
));
assert_eq!(h[0], 2971437633000883992);
assert_eq!(h[1], 9752341516515962403);
assert_eq!(h[2], 15477293561177957600);
assert_eq!(h[3], 13574628582471329853);

let minus_one = PRIME - 1;
let i = [OpaqueGoldilocks::from(Goldilocks::new(minus_one)); 8];
let h = extract_opaque_vec8(&poseidon2_gl(&i));
let h = extract_opaque_vec::<4>(&poseidon2_gl_compression(
&i,
Poseidon2OutputHalf::FirstHalf,
));
assert_eq!(h[0], 13601391594672984423);
assert_eq!(h[1], 7799837486760213030);
assert_eq!(h[2], 4721195013230721931);
assert_eq!(h[3], 6190752424007146655);
assert_eq!(h[4], 5006958669091947377);
assert_eq!(h[5], 716937639216173272);
assert_eq!(h[6], 10656923966581845557);
assert_eq!(h[7], 6633446230068695780);

// Also test the inplace version
let mut i = [
923978,
235763497586,
Expand All @@ -53,7 +51,9 @@ fn main() {
2087,
]
.map(|x| OpaqueGoldilocks::from(Goldilocks::new(x)));
let h = extract_opaque_vec8(&poseidon2_gl(&i));
poseidon2_gl_inplace(&mut i);

let h = extract_opaque_vec::<8>(&i);
assert_eq!(h[0], 14498150941209346562);
assert_eq!(h[1], 8038616707062714447);
assert_eq!(h[2], 17242548914990530484);
Expand All @@ -62,9 +62,4 @@ fn main() {
assert_eq!(h[5], 12505236434419724338);
assert_eq!(h[6], 3134668969942435695);
assert_eq!(h[7], 1912726109528180442);

// Also test the inplace version
poseidon2_gl_inplace(&mut i);
let h_inplace = extract_opaque_vec8(&i);
assert_eq!(h, h_inplace);
}
75 changes: 49 additions & 26 deletions std/machines/hash/poseidon2_gl.asm
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::array;
use std::check::assert;
use std::utils::unchanged_until;
use std::utils::new_bool;
use std::utils::force_bool;
use std::utils::sum;
use std::convert::expr;
Expand All @@ -19,8 +20,8 @@ use super::poseidon2_common::poseidon2;
// state size of 8 field elements instead of 12, matching Plonky3's implementation.
//
// This machine assumes each memory word contains a full field element, and it
// writes one field element per memory word. Use SplitGLVec8 to split the output
// into 32-bit words.
// writes one field element per memory word. Use SplitGLVec4 to split the output
// into 32-bit words.
machine Poseidon2GL(mem: Memory) with
latch: latch,
// Allow this machine to be connected via a permutation
Expand All @@ -33,19 +34,32 @@ machine Poseidon2GL(mem: Memory) with
// The input data is passed via a memory pointer: the machine will read STATE_SIZE
// field elements from memory.
//
// Similarly, the output data is written to memory at the provided pointer.
// Similarly, the output data is written to memory at the provided pointer. We don't
// have any use for writing the full state as output, so depending on the operation,
// it will either write the first half of the state (used in sponge squeeze) or the
// second half (used in sponge absorb and on merkle tree compression).
//
// Reads happen at the provided time step; writes happen at the next time step.
// Memory reads happen at input_time_step and memory writes happens at output_time_step.
//
// The addresses must be multiple of 4.
operation poseidon2_permutation
//
// This operation can output any combination of the first and second half of the final
// state, depending on the value of output_halves:
// 0: no output
// 1: first half
// 2: second half
// 3: the entire state
operation permute
input_addr,
input_time_step,
output_addr,
time_step ->;
output_time_step,
output_halves ->;

let latch = 1;

let time_step;
let input_time_step;
let output_time_step;

// Poseidon2 parameters, compatible with our powdr-plonky3 implementation.
//
Expand Down Expand Up @@ -120,14 +134,14 @@ machine Poseidon2GL(mem: Memory) with
let input: col[STATE_SIZE];

// TODO: when link is available inside functions, we can turn this into array operations.
link if is_used ~> input[0] = mem.mload(input_addr + 0, time_step);
link if is_used ~> input[1] = mem.mload(input_addr + 4, time_step);
link if is_used ~> input[2] = mem.mload(input_addr + 8, time_step);
link if is_used ~> input[3] = mem.mload(input_addr + 12, time_step);
link if is_used ~> input[4] = mem.mload(input_addr + 16, time_step);
link if is_used ~> input[5] = mem.mload(input_addr + 20, time_step);
link if is_used ~> input[6] = mem.mload(input_addr + 24, time_step);
link if is_used ~> input[7] = mem.mload(input_addr + 28, time_step);
link if is_used ~> input[0] = mem.mload(input_addr + 0, input_time_step);
link if is_used ~> input[1] = mem.mload(input_addr + 4, input_time_step);
link if is_used ~> input[2] = mem.mload(input_addr + 8, input_time_step);
link if is_used ~> input[3] = mem.mload(input_addr + 12, input_time_step);
link if is_used ~> input[4] = mem.mload(input_addr + 16, input_time_step);
link if is_used ~> input[5] = mem.mload(input_addr + 20, input_time_step);
link if is_used ~> input[6] = mem.mload(input_addr + 24, input_time_step);
link if is_used ~> input[7] = mem.mload(input_addr + 28, input_time_step);

// Generate the Poseidon2 permutation
let output = poseidon2(
Expand All @@ -143,16 +157,25 @@ machine Poseidon2GL(mem: Memory) with
input,
);

// Write the output to memory at the next time step
let output_addr;
// Decide which halves to output:
let output_halves;
let output_first_half = new_bool();
let output_second_half = new_bool();
output_halves = output_first_half + 2 * output_second_half;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need support for both and none? I think it's only used in the test?

If we only supported exactly one half, we could:

  • Receive output_second_half directly as input (saves a column)
  • Define let output_first_half = 1 - output_second_half (saves another column)
  • Remove 4 links to memory

Copy link
Member Author

@lvella lvella Mar 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried doing this way, but I couldn't because the index of output[index] must be constant, so all 8 links are needed regardless...

Now thinking better, I could create a new array[4], and assign the values depending on the output_first_half, but then the column balance would be +3.

Would it be worth?

As for the need of the full state, I know of a theoretical one: the original form of sponge construction (not the one used in plonky3) requires the full state. But yeah, we currently don't implement that, and the only reason I can think of for ever needing it is for compatibility with some existing system.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this would work:

let output_first_half = 1 - output_second_half;
let output_0 = output_first_half * output[0] + output_second_half * output[4];
link if is_used ~> mem.mstore(output_addr + 0, output_time_step, output_0);

which would not introduce any extra columns, right?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Regarding whether we'll need the full state, I don't know :)


// TODO: turn this into array operations
link if is_used ~> mem.mstore(output_addr + 0, time_step + 1, output[0]);
link if is_used ~> mem.mstore(output_addr + 4, time_step + 1, output[1]);
link if is_used ~> mem.mstore(output_addr + 8, time_step + 1, output[2]);
link if is_used ~> mem.mstore(output_addr + 12, time_step + 1, output[3]);
link if is_used ~> mem.mstore(output_addr + 16, time_step + 1, output[4]);
link if is_used ~> mem.mstore(output_addr + 20, time_step + 1, output[5]);
link if is_used ~> mem.mstore(output_addr + 24, time_step + 1, output[6]);
link if is_used ~> mem.mstore(output_addr + 28, time_step + 1, output[7]);
// TODO: turn these into array operations:

// Write the first half of the output
let output_addr;
link if is_used * output_first_half ~> mem.mstore(output_addr + 0, output_time_step, output[0]);
link if is_used * output_first_half ~> mem.mstore(output_addr + 4, output_time_step, output[1]);
link if is_used * output_first_half ~> mem.mstore(output_addr + 8, output_time_step, output[2]);
link if is_used * output_first_half ~> mem.mstore(output_addr + 12, output_time_step, output[3]);

// Write the second half of the output
let second_half_addr = output_addr + 16 * output_first_half;
link if is_used * output_second_half ~> mem.mstore(second_half_addr + 0, output_time_step, output[4]);
link if is_used * output_second_half ~> mem.mstore(second_half_addr + 4, output_time_step, output[5]);
link if is_used * output_second_half ~> mem.mstore(second_half_addr + 8, output_time_step, output[6]);
link if is_used * output_second_half ~> mem.mstore(second_half_addr + 12, output_time_step, output[7]);
}
1 change: 0 additions & 1 deletion std/machines/hash/poseidon_bb.asm
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ use std::array;
use std::check::assert;
use std::utils::unchanged_until;
use std::utils::force_bool;
use std::utils::new_bool;
use std::utils::sum;
use std::convert::expr;
use std::machines::small_field::memory::Memory;
Expand Down
Loading
Loading