Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

argon2: Add parallel lane processing #149

Merged
merged 5 commits into from
Apr 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions argon2/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ readme = "README.md"
[dependencies]
blake2 = { version = "0.9", default-features = false }
password-hash = { version = "0.1", optional = true }
rayon = { version = "1", optional = true }
zeroize = { version = "1", optional = true }

[dev-dependencies]
Expand All @@ -26,6 +27,7 @@ rand_core = { version = "0.6", features = ["std"] }

[features]
default = ["password-hash", "rand"]
parallel = ["rayon", "std"]
rand = ["password-hash/rand_core"]
std = ["password-hash/std"]

Expand Down
116 changes: 107 additions & 9 deletions argon2/src/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@ use blake2::{
Blake2b, Digest, VarBlake2b,
};

#[cfg(feature = "parallel")]
use {
alloc::vec::Vec,
core::mem,
tarcieri marked this conversation as resolved.
Show resolved Hide resolved
rayon::iter::{ParallelBridge, ParallelIterator},
};

#[cfg(feature = "zeroize")]
use zeroize::Zeroize;

Expand All @@ -29,14 +36,47 @@ struct Position {
index: u32,
}

/// Structure containing references to the memory blocks
struct Memory<'a> {
/// Memory blocks
data: &'a mut [Block],

/// Size of the memory in blocks
size: usize,
}

impl<'a> Memory<'a> {
/// Instantiate a new memory struct
fn new(data: &'a mut [Block]) -> Self {
let size = data.len();

Self { data, size }
}

/// Get a copy of the block
fn get_block(&self, idx: usize) -> Block {
self.data[idx]
}

/// Get a mutable reference to the block
fn get_block_mut(&mut self, idx: usize) -> &mut Block {
&mut self.data[idx]
}

/// Size of the memory
fn len(&self) -> usize {
self.size
}
}

/// Argon2 instance: memory pointer, number of passes, amount of memory, type,
/// and derived values.
///
/// Used to evaluate the number and location of blocks to construct in each
/// thread.
pub(crate) struct Instance<'a> {
/// Memory blocks
memory: &'a mut [Block],
memory: Memory<'a>,

/// Version
version: Version,
Expand Down Expand Up @@ -89,6 +129,8 @@ impl<'a> Instance<'a> {
mut initial_hash: digest::Output<Blake2b>,
memory: &'a mut [Block],
) -> Result<Self, Error> {
let memory = Memory::new(memory);

let mut instance = Instance {
version: context.version,
memory,
Expand All @@ -115,10 +157,58 @@ impl<'a> Instance<'a> {
Ok(instance)
}

/// Create multiple mutable references for the current instance, one for every lane
#[cfg(feature = "parallel")]
#[allow(unsafe_code)]
unsafe fn mut_self_refs(&mut self) -> Vec<usize> {
let lanes = self.lanes;
// This transmute can be skipped when a scoped threadpool is used (or when `spawn_unchecked()` gets stabilised)
let this = mem::transmute::<_, &mut Instance<'static>>(self);
let this: *mut Instance<'static> = this;
let this = this as usize;

// Dereference the raw pointer multiple times to create multiple mutable references
core::iter::repeat(this).take(lanes as usize).collect()
}

#[cfg(feature = "parallel")]
fn fill_memory_blocks_par(&mut self) {
for r in 0..self.passes {
for s in 0..SYNC_POINTS {
// Safety: - All threads that receive a references will be joined before the item gets dropped
// - All the read and write operations *shouldn't* overlap
#[allow(unsafe_code)]
let self_refs = unsafe { self.mut_self_refs() };

(0..self.lanes)
.zip(self_refs)
.par_bridge()
.for_each(|(l, self_ref)| {
#[allow(unsafe_code)]
let self_ref = unsafe { &mut *(self_ref as *mut Instance<'static>) };

self_ref.fill_segment(Position {
tarcieri marked this conversation as resolved.
Show resolved Hide resolved
pass: r,
lane: l,
slice: s,
index: 0,
});
});
}

// GENKAT note: this is where `internal_kat` would be called
}
}

/// Function that fills the entire memory t_cost times based on the first two
/// blocks in each lane
fn fill_memory_blocks(&mut self) {
// TODO(tarcieri): multithread support
#[cfg(feature = "parallel")]
if self.threads > 1 {
self.fill_memory_blocks_par();
return;
}

// Single-threaded version for p=1 case
for r in 0..self.passes {
for s in 0..SYNC_POINTS {
Expand All @@ -138,12 +228,12 @@ impl<'a> Instance<'a> {

/// XORing the last block of each lane, hashing it, making the tag.
fn finalize(&mut self, out: &mut [u8]) -> Result<(), Error> {
let mut blockhash = self.memory[(self.lane_length - 1) as usize];
let mut blockhash = self.memory.get_block((self.lane_length - 1) as usize);

// XOR the last blocks
for l in 1..self.lanes {
let last_block_in_lane = l * self.lane_length + (self.lane_length - 1);
blockhash ^= self.memory[last_block_in_lane as usize];
blockhash ^= self.memory.get_block(last_block_in_lane as usize);
}

// Hash the result
Expand Down Expand Up @@ -173,7 +263,9 @@ impl<'a> Instance<'a> {
// G(H0||1||i)
for i in 0u32..2u32 {
blake2b_long(&[blockhash, &i.to_le_bytes(), &l.to_le_bytes()], &mut hash)?;
self.memory[(l * self.lane_length + i) as usize].load(&hash);
self.memory
.get_block_mut((l * self.lane_length + i) as usize)
.load(&hash);
}
}

Expand Down Expand Up @@ -239,7 +331,7 @@ impl<'a> Instance<'a> {
}
address_block[(i % ADDRESSES_IN_BLOCK) as usize]
} else {
self.memory[prev_offset as usize][0]
self.memory.get_block(prev_offset as usize)[0]
};

// 1.2.2 Computing the lane of the reference block
Expand All @@ -260,12 +352,18 @@ impl<'a> Instance<'a> {
);

// 2 Creating a new block
let ref_block = self.memory[(self.lane_length * ref_lane + ref_index) as usize];
let prev_block = self.memory[prev_offset as usize];
let ref_block = self
.memory
.get_block((self.lane_length * ref_lane + ref_index) as usize);
let prev_block = self.memory.get_block(prev_offset as usize);

// version 1.2.1 and earlier: overwrite, not XOR
let without_xor = self.version == Version::V0x10 || position.pass == 0;
self.memory[curr_offset as usize].fill_block(prev_block, ref_block, !without_xor);
self.memory.get_block_mut(curr_offset as usize).fill_block(
prev_block,
ref_block,
!without_xor,
);

curr_offset += 1;
prev_offset += 1;
Expand Down
2 changes: 1 addition & 1 deletion argon2/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
html_logo_url = "https://raw.githubusercontent.com/RustCrypto/meta/master/logo.svg",
html_favicon_url = "https://raw.githubusercontent.com/RustCrypto/meta/master/logo.svg"
)]
#![forbid(unsafe_code)]
#![deny(unsafe_code)]
#![warn(rust_2018_idioms, missing_docs)]

#[macro_use]
Expand Down