Skip to content

Commit

Permalink
variety of comments/nits
Browse files Browse the repository at this point in the history
  • Loading branch information
eschorn1 committed Oct 29, 2024
1 parent 14d5b6d commit 479b744
Show file tree
Hide file tree
Showing 13 changed files with 206 additions and 182 deletions.
1 change: 0 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ jobs:
- uses: EmbarkStudios/cargo-deny-action@v1


# TODO: Temp 'fix' for Rust 1.80/1.81 problem involving 'time'; to be unwound...
cargo_outdated:
runs-on: ubuntu-latest
steps:
Expand Down
4 changes: 2 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## 0.4.4 (2024-10-XX)
## 0.4.4 (2024-10-29)

- Significant shrink of required stack size
- Internal-only refactoring and polishing
- Internal-only refactoring, clean-up and polishing

## 0.4.3 (2024-10-16)

Expand Down
4 changes: 3 additions & 1 deletion benches/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ Note that constant-time restrictions on the implementation do impact performance

Additional performance optimizations are on the roadmap. Near-obvious uplift can be
had with more careful modular multiplication & addition using fewer reductions. Also,
'u16' arithmetic has an x86 performance penalty.
'u16' arithmetic has an x86 performance penalty. The `cap_a_hat` pre-compute can be
put into both PublicKey and PrivateKey structs, but current causes stack overflows on
Windows with unoptimized dev builds...this will be investigated further.

~~~
October 15, 2024
Expand Down
2 changes: 1 addition & 1 deletion dudect/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ impl CryptoRng for TestRng {}
#[repr(align(8))]
pub struct AlignedBytes<const BYTE_LEN: usize>(pub(crate) [u8; BYTE_LEN]);


#[allow(deprecated)] // calling dudect fn below in inner loop
fn keygen_and_sign(runner: &mut CtRunner, mut _rng: &mut BenchRng) {
const ITERATIONS_INNER: usize = 5;
const ITERATIONS_OUTER: usize = 2_usize.pow(20); // 2**20 = 1_048_576
Expand Down
24 changes: 12 additions & 12 deletions src/encodings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@ use crate::{D, Q};
///
/// This is only used in `ml_dsa::key_gen()` and does not involve untrusted input.
///
/// **Input**: `ρ ∈ {0,1}^256`, `t1 ∈ R^k` with coefficients in `[0, 2^{bitlen(q−1)−d}-1]`. <br>
/// **Input**: `ρ ∈ B^{32}`, `t1 ∈ R^k` with coefficients in `[0, 2^{bitlen(q−1)−d}-1]`. <br>
/// **Output**: Public key `pk ∈ B^{32+32·k·(bitlen(q−1)−d)}`.
pub(crate) fn pk_encode<const K: usize, const PK_LEN: usize>(
rho: &[u8; 32], t1: &[R; K],
) -> [u8; PK_LEN] {
let blqd = bit_length(Q - 1) - D as usize;
debug_assert!(t1.iter().all(|t| is_in_range(t, 0, (1 << blqd) - 1)), "Alg 22: t1 out of range");
debug_assert_eq!(PK_LEN, 32 + 32 * K * blqd, "Alg 22: bad pk/config size");
const BLQD: usize = bit_length(Q - 1) - D as usize;
debug_assert!(t1.iter().all(|t| is_in_range(t, 0, (1 << BLQD) - 1)), "Alg 22: t1 out of range");
debug_assert_eq!(PK_LEN, 32 + 32 * K * BLQD, "Alg 22: bad pk/config size");
let mut pk = [0u8; PK_LEN];

// 1: pk ← rho
Expand All @@ -30,10 +30,10 @@ pub(crate) fn pk_encode<const K: usize, const PK_LEN: usize>(
// 3: pk ← pk || SimpleBitPack(t1[i], 2^{bitlen(q−1)−d}-1)
// 4: end for
pk[32..]
.chunks_mut(32 * blqd)
.chunks_mut(32 * BLQD)
.enumerate()
.take(K) // not strictly needed
.for_each(|(i, chunk)| simple_bit_pack(&t1[i], (1 << blqd) - 1, chunk));
.for_each(|(i, chunk)| simple_bit_pack(&t1[i], (1 << BLQD) - 1, chunk));

// 5: return pk
pk
Expand All @@ -47,7 +47,7 @@ pub(crate) fn pk_encode<const K: usize, const PK_LEN: usize>(
/// `simple_bit_unpack()` will detect malformed input -- an overly conservative (?) route for now.
///
/// **Input**: Public key `pk ∈ B^{32+32·k·(bitlen(q−1)−d)}`. <br>
/// **Output**: `ρ ∈ {0,1}^256`, `t1 ∈ R^k` with coefficients in `[0, 2^{bitlen(q−1)−d}−1]`).
/// **Output**: `ρ ∈ B^{32}`, `t1 ∈ R^k` with coefficients in `[0, 2^{bitlen(q−1)−d}−1]`).
///
/// # Errors
/// Returns an error when the internal `simple_bit_unpack()` invocation finds an element of
Expand Down Expand Up @@ -85,7 +85,7 @@ pub(crate) fn pk_decode<const K: usize, const PK_LEN: usize>(
///
/// This is only used in `ml_dsa::key_gen()` and does not involve untrusted input.
///
/// **Input**: `ρ ∈ {0,1}^256`, `K ∈ {0,1}^256`, `tr ∈ {0,1}^512`,
/// **Input**: `ρ ∈ B^{32}`, `K ∈ B^{32}`, `tr ∈ B^{64}`,
/// `s_1 ∈ R^l` with coefficients in `[−η, η]`,
/// `s_2 ∈ R^k` with coefficients in `[−η, η]`,
/// `t_0 ∈ R^k` with coefficients in `[−2^{d-1}+1, 2^{d-1}]`.
Expand Down Expand Up @@ -159,7 +159,7 @@ pub(crate) fn sk_encode<const K: usize, const L: usize, const SK_LEN: usize>(
///
/// **Input**: Private key, `sk ∈ B^{32+32+64+32·((ℓ+k)·bitlen(2η)+d·k)}`
/// Security parameter `η` (eta) must be either 2 or 4.<br>
/// **Output**: `ρ ∈ {0,1}^256`, `K ∈ {0,1}^256`, `tr ∈ {0,1}^512`,
/// **Output**: `ρ ∈ B^{32}`, `K ∈ B^{32}`, `tr ∈ B^{64}`,
/// `s_1 ∈ R^ℓ`, `s_2 ∈ R^k`, `t_0 ∈ R^k` with coefficients in `[−2^{d−1}+1, 2^{d−1}]`.
///
/// # Errors
Expand All @@ -168,13 +168,13 @@ pub(crate) fn sk_encode<const K: usize, const L: usize, const SK_LEN: usize>(
pub(crate) fn sk_decode<const K: usize, const L: usize, const SK_LEN: usize>(
eta: i32, sk: &[u8; SK_LEN],
) -> Result<(&[u8; 32], &[u8; 32], &[u8; 64], [R; L], [R; K], [R; K]), &'static str> {
const TOP: i32 = 1 << (D - 1);
debug_assert!((eta == 2) || (eta == 4), "Alg 25: incorrect eta");
debug_assert_eq!(
SK_LEN,
128 + 32 * ((K + L) * bit_length(2 * eta) + D as usize * K),
"Alg 25: bad sk/config size"
);
let top = 1 << (D - 1);
let (mut s_1, mut s_2, mut t_0) = ([R0; L], [R0; K], [R0; K]);

// 1: (rho, 𝐾, tr, 𝑦0 , … , 𝑦ℓ−1 , 𝑧0 , … , 𝑧𝑘−1 , 𝑤0 , … , 𝑤𝑘−1 ) ∈
Expand Down Expand Up @@ -211,7 +211,7 @@ pub(crate) fn sk_decode<const K: usize, const L: usize, const SK_LEN: usize>(
for i in 0..K {
//
// 9: t0[i] ← BitUnpack(wi, −2^{d−1} - 1, 2^{d−1}) ▷ This is always in the correct range
t_0[i] = bit_unpack(&sk[start + i * step..start + (i + 1) * step], top - 1, top)?;
t_0[i] = bit_unpack(&sk[start + i * step..start + (i + 1) * step], TOP - 1, TOP)?;

// 10: end for
}
Expand All @@ -231,7 +231,7 @@ pub(crate) fn sk_decode<const K: usize, const L: usize, const SK_LEN: usize>(
/// The `CTEST` generic is only passed through to the `hint_bit_pack()` leaf function
/// such that this logic becomes constant-time.
///
/// **Input**: `c_tilde ∈ {0,1}^2λ` (bits),
/// **Input**: `c_tilde ∈ B^{λ/4}`,
/// `z ∈ R^ℓ` with coefficients in `[−1*γ_1 + 1, γ_1]`,
/// `h ∈ R^k_2`. <br>
/// **Output**: Signature, `σ ∈ B^{λ/4+l·32·(1+bitlen(γ_1-1)+ω+k}`
Expand Down
25 changes: 12 additions & 13 deletions src/hashing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ use sha2::{Digest, Sha256, Sha512};
use sha3::digest::{ExtendableOutput, Update, XofReader};
use sha3::{Shake128, Shake256};


/// # Function H(v,d) of section 3.7 item 1 at bottom of page 14.
/// Takes a reference to a list of byte-slice references and runs them through Shake256.
/// Returns a xof reader for extracting extendable output.
Expand Down Expand Up @@ -86,7 +85,7 @@ pub(crate) fn sample_in_ball<const CTEST: bool>(tau: i32, rho: &[u8]) -> R {
// 13: end for
}

// slightly redundant...
// slightly redundant, but fuzz target
debug_assert!(
c.0.iter().map(|&e| usize::from(e != 0)).sum::<usize>() == tau,
"Alg 29: bad hamming weight (a)"
Expand Down Expand Up @@ -124,10 +123,10 @@ pub(crate) fn rej_ntt_poly<const CTEST: bool>(rhos: &[&[u8]]) -> T {
while j < 256 {
//
// 5: (ctx, 𝑠) ← G.Squeeze(ctx, 3)
// 6: a_hat[j] ← CoefFromThreeBytes(H128(ρ)[[c]], H128(ρ)[[c + 1]], H128(ρ)[[c + 2]])
let mut h128pc = [0u8; 3];
xof.read(&mut h128pc); // implicit c += 3
let a_hat_j = coeff_from_three_bytes::<CTEST>(h128pc); // gets a result
// 6: 𝑎[𝑗] ← CoeffFromThreeBytes(𝑠[0], 𝑠[1], 𝑠[2])
let mut h5 = [0u8; 3];
xof.read(&mut h5); // implicit c += 3
let a_hat_j = coeff_from_three_bytes::<CTEST>(h5); // gets a result

// 7: if a_hat[j] != ⊥ then
if let Ok(res) = a_hat_j {
Expand All @@ -154,7 +153,7 @@ pub(crate) fn rej_ntt_poly<const CTEST: bool>(rhos: &[&[u8]]) -> T {
/// The `CTEST` generic is only passed through to the `coef_from_half_byte()` leaf function such
/// that this logic becomes constant-time.
///
/// **Input**: A seed `ρ ∈B^66`. <br>
/// **Input**: A seed `ρ ∈B^{66}`. <br>
/// **Output**: A polynomial `a ∈ Rq`.
pub(crate) fn rej_bounded_poly<const CTEST: bool>(eta: i32, rhos: &[&[u8]]) -> R {
debug_assert_eq!(rhos.iter().map(|&i| i.len()).sum::<usize>(), 528 / 8, "Alg 31: bad rho size");
Expand Down Expand Up @@ -221,7 +220,7 @@ pub(crate) fn rej_bounded_poly<const CTEST: bool>(eta: i32, rhos: &[&[u8]]) -> R
/// such that this logic becomes constant-time.
///
/// **Input**: `ρ ∈ B^{32}`. <br>
/// **Output**: Matrix `cap_a_hat ∈ (𝑇𝑞)^{𝑘×ℓ}`
/// **Output**: Matrix `cap_a_hat ∈ 𝑇_𝑞^{𝑘×ℓ}`
#[allow(clippy::cast_possible_truncation)] // s and r as u8
pub(crate) fn expand_a<const CTEST: bool, const K: usize, const L: usize>(
rho: &[u8; 32],
Expand All @@ -247,7 +246,7 @@ pub(crate) fn expand_a<const CTEST: bool, const K: usize, const L: usize>(
/// The `CTEST` generic is only passed through to the `rej_bounded_poly()` leaf function
/// such that this logic becomes constant-time.
///
/// **Input**: `ρ ∈ B^64` <br>
/// **Input**: `ρ ∈ B^{64}` <br>
/// **Output**: Vectors `s1`, `s2` of polynomials in `R_q`.
#[allow(clippy::cast_possible_truncation)] // r and r+L
pub(crate) fn expand_s<const CTEST: bool, const K: usize, const L: usize>(
Expand All @@ -267,8 +266,8 @@ pub(crate) fn expand_s<const CTEST: bool, const K: usize, const L: usize>(
core::array::from_fn(|r| rej_bounded_poly::<CTEST>(eta, &[rho, &[(r + L) as u8], &[0]]));

// 7: return (s_1 , s_2)
debug_assert!(s1.iter().all(|r| is_in_range(r, eta, eta)), "Alg 27: s1 out of range");
debug_assert!(s2.iter().all(|r| is_in_range(r, eta, eta)), "Alg 27: s2 out of range");
debug_assert!(s1.iter().all(|r| is_in_range(r, eta, eta)), "Alg 33: s1 out of range");
debug_assert!(s2.iter().all(|r| is_in_range(r, eta, eta)), "Alg 33: s2 out of range");
(s1, s2)
}

Expand All @@ -277,8 +276,8 @@ pub(crate) fn expand_s<const CTEST: bool, const K: usize, const L: usize>(
/// Samples a vector `s ∈ R^ℓ_q` such that each polynomial `s_j` has coefficients
/// between `−γ_1 + 1` and `γ_1`. This function is not exposed to untrusted input.
///
/// **Input**: A bit string `ρ ∈ {0,1}^512` and a non-negative integer `µ`. <br>
/// **Output**: Vector `y ∈ R^ℓ_q`.
/// **Input**: A bit string `ρ ∈ B^{64}` and a non-negative integer `µ`. <br>
/// **Output**: Vector `y ∈ R^`.
pub(crate) fn expand_mask<const L: usize>(gamma1: i32, rho: &[u8; 64], mu: u16) -> [R; L] {
let mut y = [R0; L];
let mut v = [0u8; 32 * 20]; // leaving a few bytes on the table
Expand Down
15 changes: 7 additions & 8 deletions src/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::{Q, ZETA};
// Some arith routines leverage dilithium https://github.com/PQClean/PQClean/tree/master/crypto_sign


/// Algorithm 43 `BitRev8()` is not implemented; zetas are pulled from pre-computed table
/// # Algorithm 43 `BitRev8()` is not implemented; zetas are pulled from pre-computed table
/// `ZETA_TABLE_MONT`; see below (near end)
/// # Macro ensure!()
Expand Down Expand Up @@ -44,8 +44,7 @@ pub(crate) const fn partial_reduce64(a: i64) -> i32 {
}


// TODO: need to experiment a little with `mul_red(32, 32)`
#[allow(dead_code)]
#[allow(dead_code)] // I may come back to this and experiment more
#[allow(clippy::cast_possible_truncation)]
pub(crate) const fn partial_reduce64b(a: i64) -> i32 {
const MM: i64 = ((1 << 64) / (Q as i128)) as i64;
Expand Down Expand Up @@ -86,9 +85,9 @@ pub(crate) const fn bit_length(x: i32) -> usize { x.ilog2() as usize + 1 }


/// Mod +/- see definition on page 6.
/// If α is a positive integer and m ∈ Z or m ∈ `Z_α` , then m mod± α denotes the unique
/// element m′ ∈ Z in the range −α/2 < m′ ≤ α/2 such that m and m′ are congruent
/// modulo α. 'ready to optimize'
/// If `α` is a positive integer and `m ∈ Z` or `m ∈ Z_α` , then m mod± α denotes the unique
/// element `m′ ∈ Z` in the range `−α/2 < m′ ≤ α/2` such that `m` and `m′` are congruent
/// modulo `α`. 'ready to optimize'
pub(crate) fn center_mod(m: i32) -> i32 {
debug_assert!(m.abs() < 2_143_289_344, "center_mod input"); // for clarity; caught in full_reduce32
let t = full_reduce32(m);
Expand Down Expand Up @@ -120,7 +119,7 @@ pub(crate) fn mat_vec_mul<const K: usize, const L: usize>(

// Note Algorithm 44 has been dissolved into its place of use(s)

/// Algorithm 46: `AddVectorNTT(v_hat, w_hat)` on page 45.
/// # Algorithm 46: `AddVectorNTT(v_hat, w_hat)` on page 45.
/// Computes the sum `v_hat + w_hat` of two vectors `v_hat`, `w_hat` over `𝑇_𝑞`.
///
/// **Input**: `ℓ ∈ ℕ, v_hat ∈ 𝑇_𝑞^ℓ , w_hat ∈ 𝑇_𝑞^ℓ`. <br>
Expand Down Expand Up @@ -151,7 +150,7 @@ pub(crate) fn infinity_norm<const ROW: usize>(w: &[R; ROW]) -> i32 {
}


/// Algorithm 49: MontgomeryReduce(𝑎) on page 50.
/// # Algorithm 49: MontgomeryReduce(𝑎) on page 50.
/// Computes 𝑎 ⋅ 2−32 mod 𝑞.
///
/// **Input**: Integer 𝑎 with −231 𝑞 ≤ 𝑎 ≤ 231 𝑞.
Expand Down
1 change: 1 addition & 0 deletions src/high_low.rs
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ pub(crate) fn make_hint(gamma2: i32, z: Zq, r: Zq) -> bool {
pub(crate) fn use_hint(gamma2: i32, h: Zq, r: Zq) -> Zq {
//
// 1: m ← (q− 1)/(2*γ_2)
// dissolved into steps 3 and 4 below

// 2: (r1, r0) ← Decompose(r)
let (r1, r0) = decompose(gamma2, r);
Expand Down
Loading

0 comments on commit 479b744

Please sign in to comment.