openethereum · gavofyork · Feb 26, 2016 · Feb 24, 2016 · Feb 24, 2016 · Feb 24, 2016
diff --git a/util/benches/bigint.rs b/util/benches/bigint.rs
@@ -0,0 +1,65 @@
+// Copyright 2015, 2016 Ethcore (UK) Ltd.
+// This file is part of Parity.
+
+// Parity is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Parity is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Parity.  If not, see <http://www.gnu.org/licenses/>.
+
+//! benchmarking for rlp
+//! should be started with:
+//! ```bash
+//! multirust run nightly cargo bench
+//! ```
+
+#![feature(test)]
+#![feature(asm)]
+
+extern crate test;
+extern crate ethcore_util;
+
+use test::{Bencher, black_box};
+use ethcore_util::uint::*;
+
+#[bench]
+fn u256_add(b: &mut Bencher) {
+	b.iter(|| {
+		let n = black_box(10000);
+		(0..n).fold(U256::from(1234599u64), |old, new| { old.overflowing_add(U256::from(new)).0 })
+	});
+}
+
+
+#[bench]
+fn u256_sub(b: &mut Bencher) {
+	b.iter(|| {
+		let n = black_box(10000);
+		(0..n).fold(U256::from(::std::u64::MAX), |old, new| { old.overflowing_sub(U256::from(new)).0 })
+	});
+}
+
+#[bench]
+fn u256_mul(b: &mut Bencher) {
+	b.iter(|| {
+		let n = black_box(10000);
+		(0..n).fold(U256([12345u64, 0u64, 0u64, 0u64]), |old, new| { old.overflowing_mul(U256::from(new)).0 })
+	});
+}
+
+
+#[bench]
+fn u128_mul(b: &mut Bencher) {
+	b.iter(|| {
+		let n = black_box(10000);
+		(0..n).fold(U128([12345u64, 0u64]), |old, new| { old.overflowing_mul(U128::from(new)).0 })
+	});
+}
+
diff --git a/util/src/lib.rs b/util/src/lib.rs
@@ -16,6 +16,7 @@
 
 #![warn(missing_docs)]
 #![cfg_attr(feature="dev", feature(plugin))]
+#![cfg_attr(feature="dev", feature(asm))]
 #![cfg_attr(feature="dev", plugin(clippy))]
 
 // Clippy settings

diff --git a/util/src/uint.rs b/util/src/uint.rs
@@ -51,6 +51,232 @@ macro_rules! impl_map_from {
 	}
 }
 
+#[cfg(not(all(feature="dev", target_arch = "x86_64")))]
+macro_rules! uint_overflowing_add {
+	($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => ({
+		uint_overflowing_add_reg!($name, $n_words, $self_expr, $other)
+	})
+}
+
+macro_rules! uint_overflowing_add_reg {
+	($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => ({
+		let $name(ref me) = $self_expr;
+		let $name(ref you) = $other;
+		let mut ret = [0u64; $n_words];
+		let mut carry = [0u64; $n_words];
+		let mut b_carry = false;
+		let mut overflow = false;
+
+		for i in 0..$n_words {
+			ret[i] = me[i].wrapping_add(you[i]);
+
+			if ret[i] < me[i] {
+				if i < $n_words - 1 {
+					carry[i + 1] = 1;
+					b_carry = true;
+				} else {
+					overflow = true;
+				}
+			}
+		}
+		if b_carry {
+			let ret = overflowing!($name(ret).overflowing_add($name(carry)), overflow);
+			(ret, overflow)
+		} else {
+			($name(ret), overflow)
+		}
+	})
+}
+
+
+#[cfg(all(feature="dev", target_arch = "x86_64"))]
+macro_rules! uint_overflowing_add {
+	(U256, $n_words: expr, $self_expr: expr, $other: expr) => ({
+		let mut result: [u64; 4] = unsafe { mem::uninitialized() };
+		let self_t: &[u64; 4] = unsafe { &mem::transmute($self_expr) };
+		let other_t: &[u64; 4] = unsafe { &mem::transmute($other) };
+
+		let overflow: u8;
+        unsafe {
+            asm!("
+                adc $9, %r8
+                adc $10, %r9
+                adc $11, %r10
+                adc $12, %r11
+                setc %al
+                "
+            : "={r8}"(result[0]), "={r9}"(result[1]), "={r10}"(result[2]), "={r11}"(result[3]), "={al}"(overflow)
+            : "{r8}"(self_t[0]), "{r9}"(self_t[1]), "{r10}"(self_t[2]), "{r11}"(self_t[3]),
+			  "m"(other_t[0]), "m"(other_t[1]), "m"(other_t[2]), "m"(other_t[3])
+            :
+            :
+			);
+		}
+		(U256(result), overflow != 0)
+	});
+	($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => (
+		uint_overflowing_add_reg!($name, $n_words, $self_expr, $other)
+	)
+}
+
+#[cfg(not(all(feature="dev", target_arch = "x86_64")))]
+macro_rules! uint_overflowing_sub {
+	($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
+		let res = overflowing!((!$other).overflowing_add(From::from(1u64)));
+		let res = overflowing!($self_expr.overflowing_add(res));
+		(res, $self_expr < $other)
+	})
+}
+
+#[cfg(all(feature="dev", target_arch = "x86_64"))]
+macro_rules! uint_overflowing_sub {
+	(U256, $n_words: expr, $self_expr: expr, $other: expr) => ({
+		let mut result: [u64; 4] = unsafe { mem::uninitialized() };
+		let self_t: &[u64; 4] = unsafe { &mem::transmute($self_expr) };
+		let other_t: &[u64; 4] = unsafe { &mem::transmute($other) };
+
+		let overflow: u8;
+		unsafe {
+			asm!("
+                sbb $9, %r8
+                sbb $10, %r9
+                sbb $11, %r10
+                sbb $12, %r11
+                setb %al"
+             	: "=r"(result[0]), "=r"(result[1]), "=r"(result[2]), "=r"(result[3]), "={al}"(overflow)
+				: "0"(self_t[0]), "1"(self_t[1]), "2"(self_t[2]), "3"(self_t[3]), "mr"(other_t[0]), "mr"(other_t[1]), "mr"(other_t[2]), "mr"(other_t[3])
+				:
+				:
+			);
+		}
+		(U256(result), overflow != 0)
+	});
+	($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
+		let res = overflowing!((!$other).overflowing_add(From::from(1u64)));
+		let res = overflowing!($self_expr.overflowing_add(res));
+		(res, $self_expr < $other)
+	})
+}
+
+#[cfg(all(feature="dev", target_arch = "x86_64"))]
+macro_rules! uint_overflowing_mul {
+	(U256, $n_words: expr, $self_expr: expr, $other: expr) => ({
+		let mut result: [u64; 4] = unsafe { mem::uninitialized() };
+		let self_t: &[u64; 4] = unsafe { &mem::transmute($self_expr) };
+		let other_t: &[u64; 4] = unsafe { &mem::transmute($other) };
+
+		let overflow: u8;
+		unsafe {
+			asm!("
+				mov $5, %rax
+				mulq $9
+				mov %rax, %r8
+				adc $6, %rdx
+				pushf
+
+				mov %rdx, %rax
+				mulq $9
+				popf
+				adc $$0, %rax
+				adc $7, %rdx
+				pushf
+				mov %rax, %r9
+
+
+				mov %rdx, %rax
+				mulq $9
+				popf
+				adc $$0, %rax
+				adc $8, %rdx
+				pushf
+				mov %rax, %r10
+
+				mov %rdx, %rax
+				mulq $9
+				popf
+				adc $$0, %rax
+				mov %rax, %r11
+				mov %rdx, %rcx
+
+				mov $5, %rax
+				mulq $10
+				adc %rax, %r9
+				adc $6, %rdx
+				pushf
+
+				mov %rdx, %rax
+				mulq $10
+				popf
+				adc %rax, %r10
+				adc $7, %rdx
+				pushf
+
+				mov %rdx, %rax
+				mulq $10
+				popf
+				adc %rax, %r11
+				pushf
+				or %rax, %rcx
+
+				mov $5, %rax
+				mulq $11
+				popf
+				adc %rax, %r10
+				adc $6, %rdx
+				pushf
+
+				mov %rdx, %rax
+				mulq $11
+				popf
+				adc %rax, %r11
+				pushf
+				or %rdx, %rcx
+
+				mov $5, %rax
+				mulq $12
+				popf
+				adc %rax, %r11
+			    or %rdx, %rcx
+                "
+				: /* $0 */ "={r8}"(result[0]), /* $1 */ "={r9}"(result[1]), /* $2 */ "={r10}"(result[2]),
+				  /* $3 */ "={r11}"(result[3]), /* $4 */  "={rcx}"(overflow)
+
+				: /* $5 */ "m"(self_t[0]), /* $6 */ "m"(self_t[1]), /* $7 */  "m"(self_t[2]),
+				  /* $8 */ "m"(self_t[3]), /* $9 */ "m"(other_t[0]), /* $10 */ "m"(other_t[1]),
+				  /* $11 */ "m"(other_t[2]), /* $12 */ "m"(other_t[3])
+				: "rax", "rdx"
+				:
+
+			);
+		}
+		(U256(result), overflow > 0)
+	});
+	($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => (
+		uint_overflowing_mul_reg!($name, $n_words, $self_expr, $other)
+	)
+}
+
+#[cfg(not(all(feature="dev", target_arch = "x86_64")))]
+macro_rules! uint_overflowing_mul {
+	($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
+		uint_overflowing_mul_reg!($name, $n_words, $self_expr, $other)
+	})
+}
+
+macro_rules! uint_overflowing_mul_reg {
+	($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
+		let mut res = $name::from(0u64);
+		let mut overflow = false;
+		// TODO: be more efficient about this
+		for i in 0..(2 * $n_words) {
+			let v = overflowing!($self_expr.overflowing_mul_u32(($other >> (32 * i)).low_u32()), overflow);
+			let res2 = overflowing!(v.overflowing_shl(32 * i as u32), overflow);
+			res = overflowing!(res.overflowing_add(res2), overflow);
+		}
+		(res, overflow)
+	})
+}
+
 macro_rules! overflowing {
 	($op: expr, $overflow: expr) => (
 		{
@@ -297,50 +523,20 @@ macro_rules! construct_uint {
 				(res, overflow)
 			}
 
+			/// Optimized instructions
+			#[inline(always)]
 			fn overflowing_add(self, other: $name) -> ($name, bool) {
-				let $name(ref me) = self;
-				let $name(ref you) = other;
-				let mut ret = [0u64; $n_words];
-				let mut carry = [0u64; $n_words];
-				let mut b_carry = false;
-				let mut overflow = false;
-
-				for i in 0..$n_words {
-					ret[i] = me[i].wrapping_add(you[i]);
-
-					if ret[i] < me[i] {
-						if i < $n_words - 1 {
-							carry[i + 1] = 1;
-							b_carry = true;
-						} else {
-							overflow = true;
-						}
-					}
-				}
-				if b_carry {
-					let ret = overflowing!($name(ret).overflowing_add($name(carry)), overflow);
-					(ret, overflow)
-				} else {
-					($name(ret), overflow)
-				}
+				uint_overflowing_add!($name, $n_words, self, other)
 			}
 
+			#[inline(always)]
 			fn overflowing_sub(self, other: $name) -> ($name, bool) {
-				let res = overflowing!((!other).overflowing_add(From::from(1u64)));
-				let res = overflowing!(self.overflowing_add(res));
-				(res, self < other)
+				uint_overflowing_sub!($name, $n_words, self, other)
 			}
 
+			#[inline(always)]
 			fn overflowing_mul(self, other: $name) -> ($name, bool) {
-				let mut res = $name::from(0u64);
-				let mut overflow = false;
-				// TODO: be more efficient about this
-				for i in 0..(2 * $n_words) {
-					let v = overflowing!(self.overflowing_mul_u32((other >> (32 * i)).low_u32()), overflow);
-					let res2 = overflowing!(v.overflowing_shl(32 * i as u32), overflow);
-					res = overflowing!(res.overflowing_add(res2), overflow);
-				}
-				(res, overflow)
+				uint_overflowing_mul!($name, $n_words, self, other)
 			}
 
 			fn overflowing_div(self, other: $name) -> ($name, bool) {
@@ -1171,8 +1367,6 @@ mod tests {
 		);
 	}
 
-
-
 	#[test]
 	#[should_panic]
 	pub fn uint256_mul_overflow_panic() {