Skip to content
This repository was archived by the owner on Nov 6, 2020. It is now read-only.

optimization of U256 #515

Merged
merged 24 commits into from
Feb 26, 2016
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions util/benches/bigint.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// Copyright 2015, 2016 Ethcore (UK) Ltd.
// This file is part of Parity.

// Parity is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.

// Parity is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.

// You should have received a copy of the GNU General Public License
// along with Parity. If not, see <http://www.gnu.org/licenses/>.

//! benchmarking for rlp
//! should be started with:
//! ```bash
//! multirust run nightly cargo bench
//! ```

#![feature(test)]
#![feature(asm)]

extern crate test;
extern crate ethcore_util;

use test::{Bencher, black_box};
use ethcore_util::uint::*;

#[bench]
fn u256_add(b: &mut Bencher) {
b.iter(|| {
let n = black_box(10000);
(0..n).fold(U256::from(1234599u64), |old, new| { old.overflowing_add(U256::from(new)).0 })
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

benchmark is not very accurate having a call to U256::from on each iteration.

});
}


#[bench]
fn u256_sub(b: &mut Bencher) {
b.iter(|| {
let n = black_box(10000);
(0..n).fold(U256::from(::std::u64::MAX), |old, new| { old.overflowing_sub(U256::from(new)).0 })
});
}

#[bench]
fn u256_mul(b: &mut Bencher) {
b.iter(|| {
let n = black_box(10000);
(0..n).fold(U256([12345u64, 0u64, 0u64, 0u64]), |old, new| { old.overflowing_mul(U256::from(new)).0 })
});
}


#[bench]
fn u128_mul(b: &mut Bencher) {
b.iter(|| {
let n = black_box(10000);
(0..n).fold(U128([12345u64, 0u64]), |old, new| { old.overflowing_mul(U128::from(new)).0 })
});
}

1 change: 1 addition & 0 deletions util/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#![warn(missing_docs)]
#![cfg_attr(feature="dev", feature(plugin))]
#![cfg_attr(feature="dev", feature(asm))]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The dev feature is for dev dependencies like clippy. Something else should be used here

#![cfg_attr(feature="dev", plugin(clippy))]

// Clippy settings
Expand Down
272 changes: 233 additions & 39 deletions util/src/uint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,232 @@ macro_rules! impl_map_from {
}
}

#[cfg(not(all(feature="dev", target_arch = "x86_64")))]
macro_rules! uint_overflowing_add {
($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => ({
uint_overflowing_add_reg!($name, $n_words, $self_expr, $other)
})
}

macro_rules! uint_overflowing_add_reg {
($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => ({
let $name(ref me) = $self_expr;
let $name(ref you) = $other;
let mut ret = [0u64; $n_words];
let mut carry = [0u64; $n_words];
let mut b_carry = false;
let mut overflow = false;

for i in 0..$n_words {
ret[i] = me[i].wrapping_add(you[i]);

if ret[i] < me[i] {
if i < $n_words - 1 {
carry[i + 1] = 1;
b_carry = true;
} else {
overflow = true;
}
}
}
if b_carry {
let ret = overflowing!($name(ret).overflowing_add($name(carry)), overflow);
(ret, overflow)
} else {
($name(ret), overflow)
}
})
}


#[cfg(all(feature="dev", target_arch = "x86_64"))]
macro_rules! uint_overflowing_add {
(U256, $n_words: expr, $self_expr: expr, $other: expr) => ({
let mut result: [u64; 4] = unsafe { mem::uninitialized() };
let self_t: &[u64; 4] = unsafe { &mem::transmute($self_expr) };
let other_t: &[u64; 4] = unsafe { &mem::transmute($other) };

let overflow: u8;
unsafe {
asm!("
adc $9, %r8
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not use auto-assigned registers?

adc $10, %r9
adc $11, %r10
adc $12, %r11
setc %al
"
: "={r8}"(result[0]), "={r9}"(result[1]), "={r10}"(result[2]), "={r11}"(result[3]), "={al}"(overflow)
: "{r8}"(self_t[0]), "{r9}"(self_t[1]), "{r10}"(self_t[2]), "{r11}"(self_t[3]),
"m"(other_t[0]), "m"(other_t[1]), "m"(other_t[2]), "m"(other_t[3])
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Input should be allowed to be a register in case it is inlined and compiler already has the value in the register

:
:
);
}
(U256(result), overflow != 0)
});
($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => (
uint_overflowing_add_reg!($name, $n_words, $self_expr, $other)
)
}

#[cfg(not(all(feature="dev", target_arch = "x86_64")))]
macro_rules! uint_overflowing_sub {
($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
let res = overflowing!((!$other).overflowing_add(From::from(1u64)));
let res = overflowing!($self_expr.overflowing_add(res));
(res, $self_expr < $other)
})
}

#[cfg(all(feature="dev", target_arch = "x86_64"))]
macro_rules! uint_overflowing_sub {
(U256, $n_words: expr, $self_expr: expr, $other: expr) => ({
let mut result: [u64; 4] = unsafe { mem::uninitialized() };
let self_t: &[u64; 4] = unsafe { &mem::transmute($self_expr) };
let other_t: &[u64; 4] = unsafe { &mem::transmute($other) };

let overflow: u8;
unsafe {
asm!("
sbb $9, %r8
sbb $10, %r9
sbb $11, %r10
sbb $12, %r11
setb %al"
: "=r"(result[0]), "=r"(result[1]), "=r"(result[2]), "=r"(result[3]), "={al}"(overflow)
: "0"(self_t[0]), "1"(self_t[1]), "2"(self_t[2]), "3"(self_t[3]), "mr"(other_t[0]), "mr"(other_t[1]), "mr"(other_t[2]), "mr"(other_t[3])
:
:
);
}
(U256(result), overflow != 0)
});
($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
let res = overflowing!((!$other).overflowing_add(From::from(1u64)));
let res = overflowing!($self_expr.overflowing_add(res));
(res, $self_expr < $other)
})
}

#[cfg(all(feature="dev", target_arch = "x86_64"))]
macro_rules! uint_overflowing_mul {
(U256, $n_words: expr, $self_expr: expr, $other: expr) => ({
let mut result: [u64; 4] = unsafe { mem::uninitialized() };
let self_t: &[u64; 4] = unsafe { &mem::transmute($self_expr) };
let other_t: &[u64; 4] = unsafe { &mem::transmute($other) };

let overflow: u8;
unsafe {
asm!("
mov $5, %rax
mulq $9
mov %rax, %r8
adc $6, %rdx
pushf

mov %rdx, %rax
mulq $9
popf
adc $$0, %rax
adc $7, %rdx
pushf
mov %rax, %r9


mov %rdx, %rax
mulq $9
popf
adc $$0, %rax
adc $8, %rdx
pushf
mov %rax, %r10

mov %rdx, %rax
mulq $9
popf
adc $$0, %rax
mov %rax, %r11
mov %rdx, %rcx

mov $5, %rax
mulq $10
adc %rax, %r9
adc $6, %rdx
pushf

mov %rdx, %rax
mulq $10
popf
adc %rax, %r10
adc $7, %rdx
pushf

mov %rdx, %rax
mulq $10
popf
adc %rax, %r11
pushf
or %rax, %rcx

mov $5, %rax
mulq $11
popf
adc %rax, %r10
adc $6, %rdx
pushf

mov %rdx, %rax
mulq $11
popf
adc %rax, %r11
pushf
or %rdx, %rcx

mov $5, %rax
mulq $12
popf
adc %rax, %r11
or %rdx, %rcx
"
: /* $0 */ "={r8}"(result[0]), /* $1 */ "={r9}"(result[1]), /* $2 */ "={r10}"(result[2]),
/* $3 */ "={r11}"(result[3]), /* $4 */ "={rcx}"(overflow)

: /* $5 */ "m"(self_t[0]), /* $6 */ "m"(self_t[1]), /* $7 */ "m"(self_t[2]),
/* $8 */ "m"(self_t[3]), /* $9 */ "m"(other_t[0]), /* $10 */ "m"(other_t[1]),
/* $11 */ "m"(other_t[2]), /* $12 */ "m"(other_t[3])
: "rax", "rdx"
:

);
}
(U256(result), overflow > 0)
});
($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => (
uint_overflowing_mul_reg!($name, $n_words, $self_expr, $other)
)
}

#[cfg(not(all(feature="dev", target_arch = "x86_64")))]
macro_rules! uint_overflowing_mul {
($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
uint_overflowing_mul_reg!($name, $n_words, $self_expr, $other)
})
}

macro_rules! uint_overflowing_mul_reg {
($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
let mut res = $name::from(0u64);
let mut overflow = false;
// TODO: be more efficient about this
for i in 0..(2 * $n_words) {
let v = overflowing!($self_expr.overflowing_mul_u32(($other >> (32 * i)).low_u32()), overflow);
let res2 = overflowing!(v.overflowing_shl(32 * i as u32), overflow);
res = overflowing!(res.overflowing_add(res2), overflow);
}
(res, overflow)
})
}

macro_rules! overflowing {
($op: expr, $overflow: expr) => (
{
Expand Down Expand Up @@ -297,50 +523,20 @@ macro_rules! construct_uint {
(res, overflow)
}

/// Optimized instructions
#[inline(always)]
fn overflowing_add(self, other: $name) -> ($name, bool) {
let $name(ref me) = self;
let $name(ref you) = other;
let mut ret = [0u64; $n_words];
let mut carry = [0u64; $n_words];
let mut b_carry = false;
let mut overflow = false;

for i in 0..$n_words {
ret[i] = me[i].wrapping_add(you[i]);

if ret[i] < me[i] {
if i < $n_words - 1 {
carry[i + 1] = 1;
b_carry = true;
} else {
overflow = true;
}
}
}
if b_carry {
let ret = overflowing!($name(ret).overflowing_add($name(carry)), overflow);
(ret, overflow)
} else {
($name(ret), overflow)
}
uint_overflowing_add!($name, $n_words, self, other)
}

#[inline(always)]
fn overflowing_sub(self, other: $name) -> ($name, bool) {
let res = overflowing!((!other).overflowing_add(From::from(1u64)));
let res = overflowing!(self.overflowing_add(res));
(res, self < other)
uint_overflowing_sub!($name, $n_words, self, other)
}

#[inline(always)]
fn overflowing_mul(self, other: $name) -> ($name, bool) {
let mut res = $name::from(0u64);
let mut overflow = false;
// TODO: be more efficient about this
for i in 0..(2 * $n_words) {
let v = overflowing!(self.overflowing_mul_u32((other >> (32 * i)).low_u32()), overflow);
let res2 = overflowing!(v.overflowing_shl(32 * i as u32), overflow);
res = overflowing!(res.overflowing_add(res2), overflow);
}
(res, overflow)
uint_overflowing_mul!($name, $n_words, self, other)
}

fn overflowing_div(self, other: $name) -> ($name, bool) {
Expand Down Expand Up @@ -1171,8 +1367,6 @@ mod tests {
);
}



#[test]
#[should_panic]
pub fn uint256_mul_overflow_panic() {
Expand Down