Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
newpavlov committed Jun 21, 2024
1 parent 52c887a commit f04d526
Showing 1 changed file with 10 additions and 15 deletions.
25 changes: 10 additions & 15 deletions sha1/src/compress/aarch64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,18 @@ unsafe fn compress_sha1_neon(state: &mut [u32; 5], blocks: &[[u8; 64]]) {
let mut abcd = vld1q_u32(state.as_ptr());
let mut e0 = state[4];
let mut e1;
let (mut msg0, mut msg1, mut msg2, mut msg3);

for block in blocks {
let abcd_cpy = abcd;
let e0_cpy = e0;

let block_ptr: *const u32 = block.as_ptr().cast();
let mut msg0 = vld1q_u32(block_ptr);
let mut msg1 = vld1q_u32(block_ptr.add(4));
let mut msg2 = vld1q_u32(block_ptr.add(8));
let mut msg3 = vld1q_u32(block_ptr.add(12));

// Reverse byte order
msg0 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(msg0)));
msg1 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(msg1)));
msg2 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(msg2)));
msg3 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(msg3)));
// Load and reverse byte order
let bp = block.as_ptr();
msg0 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(bp)));
msg1 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(bp.add(16))));
msg2 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(bp.add(32))));
msg3 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(bp.add(48))));

let mut tmp0 = vaddq_u32(msg0, vdupq_n_u32(K[0]));
let mut tmp1 = vaddq_u32(msg1, vdupq_n_u32(K[0]));
Expand Down Expand Up @@ -151,14 +148,11 @@ unsafe fn compress_sha1_neon(state: &mut [u32; 5], blocks: &[[u8; 64]]) {
abcd = vsha1pq_u32(abcd, e0, tmp0);
tmp0 = vaddq_u32(msg2, vdupq_n_u32(K[3]));
msg3 = vsha1su1q_u32(msg3, msg2);
msg0 = vsha1su0q_u32(msg0, msg1, msg2);

// Rounds 68-71
e0 = vsha1h_u32(vgetq_lane_u32(abcd, 0));
abcd = vsha1pq_u32(abcd, e1, tmp1);
tmp1 = vaddq_u32(msg3, vdupq_n_u32(K[3]));
msg0 = vsha1su1q_u32(msg0, msg3);
let _ = msg0;

// Rounds 72-75
e1 = vsha1h_u32(vgetq_lane_u32(abcd, 0));
Expand All @@ -168,8 +162,9 @@ unsafe fn compress_sha1_neon(state: &mut [u32; 5], blocks: &[[u8; 64]]) {
e0 = vsha1h_u32(vgetq_lane_u32(abcd, 0));
abcd = vsha1pq_u32(abcd, e1, tmp1);

e0 = e0.wrapping_add(e0_cpy);
// Update state
abcd = vaddq_u32(abcd_cpy, abcd);
e0 = e0.wrapping_add(e0_cpy);
}

// Save state
Expand Down

0 comments on commit f04d526

Please sign in to comment.