diff --git a/.cirrus.yml b/.cirrus.yml index e5fb43c3d..0ed2be730 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -74,6 +74,7 @@ task: - env: {WIDEMUL: int64, RECOVERY: yes} - env: {WIDEMUL: int64, ECDH: yes, SCHNORRSIG: yes, EXPERIMENTAL: yes, ECDSA_S2C: yes, RANGEPROOF: yes, WHITELIST: yes, GENERATOR: yes, MUSIG: yes, ECDSAADAPTOR: yes, BPPP: yes} - env: {WIDEMUL: int128} + - env: {WIDEMUL: int128_struct} - env: {WIDEMUL: int128, RECOVERY: yes, SCHNORRSIG: yes} - env: {WIDEMUL: int128, ECDH: yes, SCHNORRSIG: yes, EXPERIMENTAL: yes, ECDSA_S2C: yes, RANGEPROOF: yes, WHITELIST: yes, GENERATOR: yes, MUSIG: yes, ECDSAADAPTOR: yes, BPPP: yes} - env: {WIDEMUL: int128, ASM: x86_64} @@ -268,20 +269,26 @@ task: ECDSAADAPTOR: yes BPPP: yes CTIMETEST: no + # Use a MinGW-w64 host to tell ./configure we're building for Windows. + # This will detect some MinGW-w64 tools but then make will need only + # the MSVC tools CC, AR and NM as specified below. + HOST: x86_64-w64-mingw32 + CC: /opt/msvc/bin/x64/cl + AR: /opt/msvc/bin/x64/lib + NM: /opt/msvc/bin/x64/dumpbin -symbols -headers # Set non-essential options that affect the CLI messages here. # (They depend on the user's taste, so we don't want to set them automatically in configure.ac.) CFLAGS: -nologo -diagnostics:caret LDFLAGS: -XCClinker -nologo -XCClinker -diagnostics:caret - # Use a MinGW-w64 host to tell ./configure we're building for Windows. - # This will detect some MinGW-w64 tools but then make will need only - # the MSVC tools CC, AR and NM as specified below. matrix: - name: "x86_64 (MSVC): Windows (Debian stable, Wine)" + - name: "x86_64 (MSVC): Windows (Debian stable, Wine, int128_struct)" env: - HOST: x86_64-w64-mingw32 - CC: /opt/msvc/bin/x64/cl - AR: /opt/msvc/bin/x64/lib - NM: /opt/msvc/bin/x64/dumpbin -symbols -headers + WIDEMUL: int128_struct + - name: "x86_64 (MSVC): Windows (Debian stable, Wine, int128_struct with __(u)mulh)" + env: + WIDEMUL: int128_struct + CPPFLAGS: -DSECP256K1_MSVC_MULH_TEST_OVERRIDE - name: "i686 (MSVC): Windows (Debian stable, Wine)" env: HOST: i686-w64-mingw32 @@ -346,6 +353,40 @@ task: - ./ci/cirrus.sh << : *CAT_LOGS +# Memory sanitizers +task: + << : *LINUX_CONTAINER + name: "MSan" + env: + ECDH: yes + RECOVERY: yes + SCHNORRSIG: yes + EXPERIMENTAL: yes + ECDSA_S2C: yes + GENERATOR: yes + RANGEPROOF: yes + WHITELIST: yes + MUSIG: yes + ECDSAADAPTOR: yes + BPPP: yes + CTIMETEST: no + CC: clang + SECP256K1_TEST_ITERS: 32 + ASM: no + container: + memory: 2G + matrix: + - env: + CFLAGS: "-fsanitize=memory -g" + - env: + ECMULTGENPRECISION: 2 + ECMULTWINDOW: 2 + CFLAGS: "-fsanitize=memory -g -O3" + << : *MERGE_BASE + test_script: + - ./ci/cirrus.sh + << : *CAT_LOGS + task: name: "C++ -fpermissive (entire project)" << : *LINUX_CONTAINER diff --git a/Makefile.am b/Makefile.am index 8d719d27d..c26ab2c65 100644 --- a/Makefile.am +++ b/Makefile.am @@ -50,6 +50,12 @@ noinst_HEADERS += src/precomputed_ecmult.h noinst_HEADERS += src/precomputed_ecmult_gen.h noinst_HEADERS += src/assumptions.h noinst_HEADERS += src/util.h +noinst_HEADERS += src/int128.h +noinst_HEADERS += src/int128_impl.h +noinst_HEADERS += src/int128_native.h +noinst_HEADERS += src/int128_native_impl.h +noinst_HEADERS += src/int128_struct.h +noinst_HEADERS += src/int128_struct_impl.h noinst_HEADERS += src/scratch.h noinst_HEADERS += src/scratch_impl.h noinst_HEADERS += src/selftest.h diff --git a/ci/cirrus.sh b/ci/cirrus.sh index 3b817d3c1..413fb1400 100755 --- a/ci/cirrus.sh +++ b/ci/cirrus.sh @@ -5,6 +5,27 @@ set -x export LC_ALL=C +# Print relevant CI environment to allow reproducing the job outside of CI. +print_environment() { + # Turn off -x because it messes up the output + set +x + # There are many ways to print variable names and their content. This one + # does not rely on bash. + for i in WERROR_CFLAGS MAKEFLAGS BUILD \ + ECMULTWINDOW ECMULTGENPRECISION ASM WIDEMUL WITH_VALGRIND EXTRAFLAGS \ + EXPERIMENTAL ECDH RECOVERY SCHNORRSIG \ + ECDSA_S2C GENERATOR RANGEPROOF WHITELIST MUSIG ECDSAADAPTOR BPPP \ + SECP256K1_TEST_ITERS BENCH SECP256K1_BENCH_ITERS CTIMETEST\ + EXAMPLES \ + WRAPPER_CMD CC AR NM HOST + do + eval 'printf "%s %s " "$i=\"${'"$i"'}\""' + done + echo "$0" + set -x +} +print_environment + # Start persistent wineserver if necessary. # This speeds up jobs with many invocations of wine (e.g., ./configure with MSVC) tremendously. case "$WRAPPER_CMD" in diff --git a/configure.ac b/configure.ac index 64cb8c896..d04a9106c 100644 --- a/configure.ac +++ b/configure.ac @@ -220,7 +220,11 @@ AC_ARG_ENABLE(reduced_surjection_proof_size, [SECP_SET_DEFAULT([use_reduced_surjection_proof_size], [no], [no])]) # Test-only override of the (autodetected by the C code) "widemul" setting. -# Legal values are int64 (for [u]int64_t), int128 (for [unsigned] __int128), and auto (the default). +# Legal values are: +# * int64 (for [u]int64_t), +# * int128 (for [unsigned] __int128), +# * int128_struct (for int128 implemented as a structure), +# * and auto (the default). AC_ARG_WITH([test-override-wide-multiply], [] ,[set_widemul=$withval], [set_widemul=auto]) AC_ARG_WITH([asm], [AS_HELP_STRING([--with-asm=x86_64|arm|no|auto], @@ -342,6 +346,9 @@ fi # Select wide multiplication implementation case $set_widemul in +int128_struct) + AC_DEFINE(USE_FORCE_WIDEMUL_INT128_STRUCT, 1, [Define this symbol to force the use of the structure for simulating (unsigned) int128 based wide multiplication]) + ;; int128) AC_DEFINE(USE_FORCE_WIDEMUL_INT128, 1, [Define this symbol to force the use of the (unsigned) __int128 based wide multiplication implementation]) ;; diff --git a/src/assumptions.h b/src/assumptions.h index 6dc527b28..8ed04209e 100644 --- a/src/assumptions.h +++ b/src/assumptions.h @@ -10,6 +10,9 @@ #include #include "util.h" +#if defined(SECP256K1_INT128_NATIVE) +#include "int128_native.h" +#endif /* This library, like most software, relies on a number of compiler implementation defined (but not undefined) behaviours. Although the behaviours we require are essentially universal we test them specifically here to @@ -55,7 +58,7 @@ struct secp256k1_assumption_checker { /* To int64_t. */ ((int64_t)(uint64_t)0xB123C456D789E012ULL == (int64_t)-(int64_t)0x4EDC3BA928761FEEULL) && -#if defined(SECP256K1_WIDEMUL_INT128) +#if defined(SECP256K1_INT128_NATIVE) ((int64_t)(((uint128_t)0xA1234567B8901234ULL << 64) + 0xC5678901D2345678ULL) == (int64_t)-(int64_t)0x3A9876FE2DCBA988ULL) && (((int64_t)(int128_t)(((uint128_t)0xB1C2D3E4F5A6B7C8ULL << 64) + 0xD9E0F1A2B3C4D5E6ULL)) == (int64_t)(uint64_t)0xD9E0F1A2B3C4D5E6ULL) && (((int64_t)(int128_t)(((uint128_t)0xABCDEF0123456789ULL << 64) + 0x0123456789ABCDEFULL)) == (int64_t)(uint64_t)0x0123456789ABCDEFULL) && @@ -71,7 +74,7 @@ struct secp256k1_assumption_checker { ((((int16_t)0xE9AC) >> 4) == (int16_t)(uint16_t)0xFE9A) && ((((int32_t)0x937C918A) >> 9) == (int32_t)(uint32_t)0xFFC9BE48) && ((((int64_t)0xA8B72231DF9CF4B9ULL) >> 19) == (int64_t)(uint64_t)0xFFFFF516E4463BF3ULL) && -#if defined(SECP256K1_WIDEMUL_INT128) +#if defined(SECP256K1_INT128_NATIVE) ((((int128_t)(((uint128_t)0xCD833A65684A0DBCULL << 64) + 0xB349312F71EA7637ULL)) >> 39) == (int128_t)(((uint128_t)0xFFFFFFFFFF9B0674ULL << 64) + 0xCAD0941B79669262ULL)) && #endif 1) * 2 - 1]; diff --git a/src/bench_whitelist.c b/src/bench_whitelist.c index 18bfa144d..6225271f5 100644 --- a/src/bench_whitelist.c +++ b/src/bench_whitelist.c @@ -11,6 +11,7 @@ #include "util.h" #include "bench.h" #include "hash_impl.h" +#include "int128_impl.h" #include "scalar_impl.h" #include "testrand_impl.h" diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h index bbc820c77..3776fe73f 100644 --- a/src/ecmult_impl.h +++ b/src/ecmult_impl.h @@ -200,9 +200,15 @@ static int secp256k1_ecmult_wnaf(int *wnaf, int len, const secp256k1_scalar *a, bit += now; } #ifdef VERIFY - CHECK(carry == 0); - while (bit < 256) { - CHECK(secp256k1_scalar_get_bits(&s, bit++, 1) == 0); + { + int verify_bit = bit; + + VERIFY_CHECK(carry == 0); + + while (verify_bit < 256) { + VERIFY_CHECK(secp256k1_scalar_get_bits(&s, verify_bit, 1) == 0); + verify_bit++; + } } #endif return last_set_bit + 1; diff --git a/src/field_5x52_int128_impl.h b/src/field_5x52_int128_impl.h index 0ed6118cc..18567b95f 100644 --- a/src/field_5x52_int128_impl.h +++ b/src/field_5x52_int128_impl.h @@ -9,14 +9,18 @@ #include +#include "int128.h" + #ifdef VERIFY #define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0) +#define VERIFY_BITS_128(x, n) VERIFY_CHECK(secp256k1_u128_check_bits((x), (n))) #else #define VERIFY_BITS(x, n) do { } while(0) +#define VERIFY_BITS_128(x, n) do { } while(0) #endif SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t *a, const uint64_t * SECP256K1_RESTRICT b) { - uint128_t c, d; + secp256k1_uint128 c, d; uint64_t t3, t4, tx, u0; uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4]; const uint64_t M = 0xFFFFFFFFFFFFFULL, R = 0x1000003D10ULL; @@ -40,121 +44,119 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint64_t *r, const uint64_t * Note that [x 0 0 0 0 0] = [x*R]. */ - d = (uint128_t)a0 * b[3] - + (uint128_t)a1 * b[2] - + (uint128_t)a2 * b[1] - + (uint128_t)a3 * b[0]; - VERIFY_BITS(d, 114); + secp256k1_u128_mul(&d, a0, b[3]); + secp256k1_u128_accum_mul(&d, a1, b[2]); + secp256k1_u128_accum_mul(&d, a2, b[1]); + secp256k1_u128_accum_mul(&d, a3, b[0]); + VERIFY_BITS_128(&d, 114); /* [d 0 0 0] = [p3 0 0 0] */ - c = (uint128_t)a4 * b[4]; - VERIFY_BITS(c, 112); + secp256k1_u128_mul(&c, a4, b[4]); + VERIFY_BITS_128(&c, 112); /* [c 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */ - d += (uint128_t)R * (uint64_t)c; c >>= 64; - VERIFY_BITS(d, 115); - VERIFY_BITS(c, 48); + secp256k1_u128_accum_mul(&d, R, secp256k1_u128_to_u64(&c)); secp256k1_u128_rshift(&c, 64); + VERIFY_BITS_128(&d, 115); + VERIFY_BITS_128(&c, 48); /* [(c<<12) 0 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */ - t3 = d & M; d >>= 52; + t3 = secp256k1_u128_to_u64(&d) & M; secp256k1_u128_rshift(&d, 52); VERIFY_BITS(t3, 52); - VERIFY_BITS(d, 63); + VERIFY_BITS_128(&d, 63); /* [(c<<12) 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */ - d += (uint128_t)a0 * b[4] - + (uint128_t)a1 * b[3] - + (uint128_t)a2 * b[2] - + (uint128_t)a3 * b[1] - + (uint128_t)a4 * b[0]; - VERIFY_BITS(d, 115); + secp256k1_u128_accum_mul(&d, a0, b[4]); + secp256k1_u128_accum_mul(&d, a1, b[3]); + secp256k1_u128_accum_mul(&d, a2, b[2]); + secp256k1_u128_accum_mul(&d, a3, b[1]); + secp256k1_u128_accum_mul(&d, a4, b[0]); + VERIFY_BITS_128(&d, 115); /* [(c<<12) 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ - d += (uint128_t)(R << 12) * (uint64_t)c; - VERIFY_BITS(d, 116); + secp256k1_u128_accum_mul(&d, R << 12, secp256k1_u128_to_u64(&c)); + VERIFY_BITS_128(&d, 116); /* [d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ - t4 = d & M; d >>= 52; + t4 = secp256k1_u128_to_u64(&d) & M; secp256k1_u128_rshift(&d, 52); VERIFY_BITS(t4, 52); - VERIFY_BITS(d, 64); + VERIFY_BITS_128(&d, 64); /* [d t4 t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ tx = (t4 >> 48); t4 &= (M >> 4); VERIFY_BITS(tx, 4); VERIFY_BITS(t4, 48); /* [d t4+(tx<<48) t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ - c = (uint128_t)a0 * b[0]; - VERIFY_BITS(c, 112); + secp256k1_u128_mul(&c, a0, b[0]); + VERIFY_BITS_128(&c, 112); /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 0 p4 p3 0 0 p0] */ - d += (uint128_t)a1 * b[4] - + (uint128_t)a2 * b[3] - + (uint128_t)a3 * b[2] - + (uint128_t)a4 * b[1]; - VERIFY_BITS(d, 115); + secp256k1_u128_accum_mul(&d, a1, b[4]); + secp256k1_u128_accum_mul(&d, a2, b[3]); + secp256k1_u128_accum_mul(&d, a3, b[2]); + secp256k1_u128_accum_mul(&d, a4, b[1]); + VERIFY_BITS_128(&d, 115); /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ - u0 = d & M; d >>= 52; + u0 = secp256k1_u128_to_u64(&d) & M; secp256k1_u128_rshift(&d, 52); VERIFY_BITS(u0, 52); - VERIFY_BITS(d, 63); + VERIFY_BITS_128(&d, 63); /* [d u0 t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ /* [d 0 t4+(tx<<48)+(u0<<52) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ u0 = (u0 << 4) | tx; VERIFY_BITS(u0, 56); /* [d 0 t4+(u0<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ - c += (uint128_t)u0 * (R >> 4); - VERIFY_BITS(c, 115); + secp256k1_u128_accum_mul(&c, u0, R >> 4); + VERIFY_BITS_128(&c, 115); /* [d 0 t4 t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ - r[0] = c & M; c >>= 52; + r[0] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52); VERIFY_BITS(r[0], 52); - VERIFY_BITS(c, 61); + VERIFY_BITS_128(&c, 61); /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 0 p0] */ - c += (uint128_t)a0 * b[1] - + (uint128_t)a1 * b[0]; - VERIFY_BITS(c, 114); + secp256k1_u128_accum_mul(&c, a0, b[1]); + secp256k1_u128_accum_mul(&c, a1, b[0]); + VERIFY_BITS_128(&c, 114); /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 p1 p0] */ - d += (uint128_t)a2 * b[4] - + (uint128_t)a3 * b[3] - + (uint128_t)a4 * b[2]; - VERIFY_BITS(d, 114); + secp256k1_u128_accum_mul(&d, a2, b[4]); + secp256k1_u128_accum_mul(&d, a3, b[3]); + secp256k1_u128_accum_mul(&d, a4, b[2]); + VERIFY_BITS_128(&d, 114); /* [d 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */ - c += (d & M) * R; d >>= 52; - VERIFY_BITS(c, 115); - VERIFY_BITS(d, 62); + secp256k1_u128_accum_mul(&c, secp256k1_u128_to_u64(&d) & M, R); secp256k1_u128_rshift(&d, 52); + VERIFY_BITS_128(&c, 115); + VERIFY_BITS_128(&d, 62); /* [d 0 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */ - r[1] = c & M; c >>= 52; + r[1] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52); VERIFY_BITS(r[1], 52); - VERIFY_BITS(c, 63); + VERIFY_BITS_128(&c, 63); /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */ - c += (uint128_t)a0 * b[2] - + (uint128_t)a1 * b[1] - + (uint128_t)a2 * b[0]; - VERIFY_BITS(c, 114); + secp256k1_u128_accum_mul(&c, a0, b[2]); + secp256k1_u128_accum_mul(&c, a1, b[1]); + secp256k1_u128_accum_mul(&c, a2, b[0]); + VERIFY_BITS_128(&c, 114); /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 p2 p1 p0] */ - d += (uint128_t)a3 * b[4] - + (uint128_t)a4 * b[3]; - VERIFY_BITS(d, 114); + secp256k1_u128_accum_mul(&d, a3, b[4]); + secp256k1_u128_accum_mul(&d, a4, b[3]); + VERIFY_BITS_128(&d, 114); /* [d 0 0 t4 t3 c t1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - c += (uint128_t)R * (uint64_t)d; d >>= 64; - VERIFY_BITS(c, 115); - VERIFY_BITS(d, 50); + secp256k1_u128_accum_mul(&c, R, secp256k1_u128_to_u64(&d)); secp256k1_u128_rshift(&d, 64); + VERIFY_BITS_128(&c, 115); + VERIFY_BITS_128(&d, 50); /* [(d<<12) 0 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - r[2] = c & M; c >>= 52; + r[2] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52); VERIFY_BITS(r[2], 52); - VERIFY_BITS(c, 63); + VERIFY_BITS_128(&c, 63); /* [(d<<12) 0 0 0 t4 t3+c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - c += (uint128_t)(R << 12) * (uint64_t)d + t3; - VERIFY_BITS(c, 100); + secp256k1_u128_accum_mul(&c, R << 12, secp256k1_u128_to_u64(&d)); + secp256k1_u128_accum_u64(&c, t3); + VERIFY_BITS_128(&c, 100); /* [t4 c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - r[3] = c & M; c >>= 52; + r[3] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52); VERIFY_BITS(r[3], 52); - VERIFY_BITS(c, 48); + VERIFY_BITS_128(&c, 48); /* [t4+c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - c += t4; - VERIFY_BITS(c, 49); - /* [c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - r[4] = c; + r[4] = secp256k1_u128_to_u64(&c) + t4; VERIFY_BITS(r[4], 49); /* [r4 r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ } SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t *a) { - uint128_t c, d; + secp256k1_uint128 c, d; uint64_t a0 = a[0], a1 = a[1], a2 = a[2], a3 = a[3], a4 = a[4]; int64_t t3, t4, tx, u0; const uint64_t M = 0xFFFFFFFFFFFFFULL, R = 0x1000003D10ULL; @@ -170,107 +172,105 @@ SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint64_t *r, const uint64_t * Note that [x 0 0 0 0 0] = [x*R]. */ - d = (uint128_t)(a0*2) * a3 - + (uint128_t)(a1*2) * a2; - VERIFY_BITS(d, 114); + secp256k1_u128_mul(&d, a0*2, a3); + secp256k1_u128_accum_mul(&d, a1*2, a2); + VERIFY_BITS_128(&d, 114); /* [d 0 0 0] = [p3 0 0 0] */ - c = (uint128_t)a4 * a4; - VERIFY_BITS(c, 112); + secp256k1_u128_mul(&c, a4, a4); + VERIFY_BITS_128(&c, 112); /* [c 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */ - d += (uint128_t)R * (uint64_t)c; c >>= 64; - VERIFY_BITS(d, 115); - VERIFY_BITS(c, 48); + secp256k1_u128_accum_mul(&d, R, secp256k1_u128_to_u64(&c)); secp256k1_u128_rshift(&c, 64); + VERIFY_BITS_128(&d, 115); + VERIFY_BITS_128(&c, 48); /* [(c<<12) 0 0 0 0 0 d 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */ - t3 = d & M; d >>= 52; + t3 = secp256k1_u128_to_u64(&d) & M; secp256k1_u128_rshift(&d, 52); VERIFY_BITS(t3, 52); - VERIFY_BITS(d, 63); + VERIFY_BITS_128(&d, 63); /* [(c<<12) 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 0 p3 0 0 0] */ a4 *= 2; - d += (uint128_t)a0 * a4 - + (uint128_t)(a1*2) * a3 - + (uint128_t)a2 * a2; - VERIFY_BITS(d, 115); + secp256k1_u128_accum_mul(&d, a0, a4); + secp256k1_u128_accum_mul(&d, a1*2, a3); + secp256k1_u128_accum_mul(&d, a2, a2); + VERIFY_BITS_128(&d, 115); /* [(c<<12) 0 0 0 0 d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ - d += (uint128_t)(R << 12) * (uint64_t)c; - VERIFY_BITS(d, 116); + secp256k1_u128_accum_mul(&d, R << 12, secp256k1_u128_to_u64(&c)); + VERIFY_BITS_128(&d, 116); /* [d t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ - t4 = d & M; d >>= 52; + t4 = secp256k1_u128_to_u64(&d) & M; secp256k1_u128_rshift(&d, 52); VERIFY_BITS(t4, 52); - VERIFY_BITS(d, 64); + VERIFY_BITS_128(&d, 64); /* [d t4 t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ tx = (t4 >> 48); t4 &= (M >> 4); VERIFY_BITS(tx, 4); VERIFY_BITS(t4, 48); /* [d t4+(tx<<48) t3 0 0 0] = [p8 0 0 0 p4 p3 0 0 0] */ - c = (uint128_t)a0 * a0; - VERIFY_BITS(c, 112); + secp256k1_u128_mul(&c, a0, a0); + VERIFY_BITS_128(&c, 112); /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 0 p4 p3 0 0 p0] */ - d += (uint128_t)a1 * a4 - + (uint128_t)(a2*2) * a3; - VERIFY_BITS(d, 114); + secp256k1_u128_accum_mul(&d, a1, a4); + secp256k1_u128_accum_mul(&d, a2*2, a3); + VERIFY_BITS_128(&d, 114); /* [d t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ - u0 = d & M; d >>= 52; + u0 = secp256k1_u128_to_u64(&d) & M; secp256k1_u128_rshift(&d, 52); VERIFY_BITS(u0, 52); - VERIFY_BITS(d, 62); + VERIFY_BITS_128(&d, 62); /* [d u0 t4+(tx<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ /* [d 0 t4+(tx<<48)+(u0<<52) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ u0 = (u0 << 4) | tx; VERIFY_BITS(u0, 56); /* [d 0 t4+(u0<<48) t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ - c += (uint128_t)u0 * (R >> 4); - VERIFY_BITS(c, 113); + secp256k1_u128_accum_mul(&c, u0, R >> 4); + VERIFY_BITS_128(&c, 113); /* [d 0 t4 t3 0 0 c] = [p8 0 0 p5 p4 p3 0 0 p0] */ - r[0] = c & M; c >>= 52; + r[0] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52); VERIFY_BITS(r[0], 52); - VERIFY_BITS(c, 61); + VERIFY_BITS_128(&c, 61); /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 0 p0] */ a0 *= 2; - c += (uint128_t)a0 * a1; - VERIFY_BITS(c, 114); + secp256k1_u128_accum_mul(&c, a0, a1); + VERIFY_BITS_128(&c, 114); /* [d 0 t4 t3 0 c r0] = [p8 0 0 p5 p4 p3 0 p1 p0] */ - d += (uint128_t)a2 * a4 - + (uint128_t)a3 * a3; - VERIFY_BITS(d, 114); + secp256k1_u128_accum_mul(&d, a2, a4); + secp256k1_u128_accum_mul(&d, a3, a3); + VERIFY_BITS_128(&d, 114); /* [d 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */ - c += (d & M) * R; d >>= 52; - VERIFY_BITS(c, 115); - VERIFY_BITS(d, 62); + secp256k1_u128_accum_mul(&c, secp256k1_u128_to_u64(&d) & M, R); secp256k1_u128_rshift(&d, 52); + VERIFY_BITS_128(&c, 115); + VERIFY_BITS_128(&d, 62); /* [d 0 0 t4 t3 0 c r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */ - r[1] = c & M; c >>= 52; + r[1] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52); VERIFY_BITS(r[1], 52); - VERIFY_BITS(c, 63); + VERIFY_BITS_128(&c, 63); /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 0 p1 p0] */ - c += (uint128_t)a0 * a2 - + (uint128_t)a1 * a1; - VERIFY_BITS(c, 114); + secp256k1_u128_accum_mul(&c, a0, a2); + secp256k1_u128_accum_mul(&c, a1, a1); + VERIFY_BITS_128(&c, 114); /* [d 0 0 t4 t3 c r1 r0] = [p8 0 p6 p5 p4 p3 p2 p1 p0] */ - d += (uint128_t)a3 * a4; - VERIFY_BITS(d, 114); + secp256k1_u128_accum_mul(&d, a3, a4); + VERIFY_BITS_128(&d, 114); /* [d 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - c += (uint128_t)R * (uint64_t)d; d >>= 64; - VERIFY_BITS(c, 115); - VERIFY_BITS(d, 50); + secp256k1_u128_accum_mul(&c, R, secp256k1_u128_to_u64(&d)); secp256k1_u128_rshift(&d, 64); + VERIFY_BITS_128(&c, 115); + VERIFY_BITS_128(&d, 50); /* [(d<<12) 0 0 0 t4 t3 c r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - r[2] = c & M; c >>= 52; + r[2] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52); VERIFY_BITS(r[2], 52); - VERIFY_BITS(c, 63); + VERIFY_BITS_128(&c, 63); /* [(d<<12) 0 0 0 t4 t3+c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - c += (uint128_t)(R << 12) * (uint64_t)d + t3; - VERIFY_BITS(c, 100); + secp256k1_u128_accum_mul(&c, R << 12, secp256k1_u128_to_u64(&d)); + secp256k1_u128_accum_u64(&c, t3); + VERIFY_BITS_128(&c, 100); /* [t4 c r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - r[3] = c & M; c >>= 52; + r[3] = secp256k1_u128_to_u64(&c) & M; secp256k1_u128_rshift(&c, 52); VERIFY_BITS(r[3], 52); - VERIFY_BITS(c, 48); + VERIFY_BITS_128(&c, 48); /* [t4+c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - c += t4; - VERIFY_BITS(c, 49); - /* [c r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ - r[4] = c; + r[4] = secp256k1_u128_to_u64(&c) + t4; VERIFY_BITS(r[4], 49); /* [r4 r3 r2 r1 r0] = [p8 p7 p6 p5 p4 p3 p2 p1 p0] */ } diff --git a/src/int128.h b/src/int128.h new file mode 100644 index 000000000..84d969a23 --- /dev/null +++ b/src/int128.h @@ -0,0 +1,85 @@ +#ifndef SECP256K1_INT128_H +#define SECP256K1_INT128_H + +#include "util.h" + +#if defined(SECP256K1_WIDEMUL_INT128) +# if defined(SECP256K1_INT128_NATIVE) +# include "int128_native.h" +# elif defined(SECP256K1_INT128_STRUCT) +# include "int128_struct.h" +# else +# error "Please select int128 implementation" +# endif + +/* Construct an unsigned 128-bit value from a high and a low 64-bit value. */ +static SECP256K1_INLINE void secp256k1_u128_load(secp256k1_uint128 *r, uint64_t hi, uint64_t lo); + +/* Multiply two unsigned 64-bit values a and b and write the result to r. */ +static SECP256K1_INLINE void secp256k1_u128_mul(secp256k1_uint128 *r, uint64_t a, uint64_t b); + +/* Multiply two unsigned 64-bit values a and b and add the result to r. + * The final result is taken modulo 2^128. + */ +static SECP256K1_INLINE void secp256k1_u128_accum_mul(secp256k1_uint128 *r, uint64_t a, uint64_t b); + +/* Add an unsigned 64-bit value a to r. + * The final result is taken modulo 2^128. + */ +static SECP256K1_INLINE void secp256k1_u128_accum_u64(secp256k1_uint128 *r, uint64_t a); + +/* Unsigned (logical) right shift. + * Non-constant time in n. + */ +static SECP256K1_INLINE void secp256k1_u128_rshift(secp256k1_uint128 *r, unsigned int n); + +/* Return the low 64-bits of a 128-bit value as an unsigned 64-bit value. */ +static SECP256K1_INLINE uint64_t secp256k1_u128_to_u64(const secp256k1_uint128 *a); + +/* Return the high 64-bits of a 128-bit value as an unsigned 64-bit value. */ +static SECP256K1_INLINE uint64_t secp256k1_u128_hi_u64(const secp256k1_uint128 *a); + +/* Write an unsigned 64-bit value to r. */ +static SECP256K1_INLINE void secp256k1_u128_from_u64(secp256k1_uint128 *r, uint64_t a); + +/* Tests if r is strictly less than to 2^n. + * n must be strictly less than 128. + */ +static SECP256K1_INLINE int secp256k1_u128_check_bits(const secp256k1_uint128 *r, unsigned int n); + +/* Construct an signed 128-bit value from a high and a low 64-bit value. */ +static SECP256K1_INLINE void secp256k1_i128_load(secp256k1_int128 *r, int64_t hi, uint64_t lo); + +/* Multiply two signed 64-bit values a and b and write the result to r. */ +static SECP256K1_INLINE void secp256k1_i128_mul(secp256k1_int128 *r, int64_t a, int64_t b); + +/* Multiply two signed 64-bit values a and b and add the result to r. + * Overflow or underflow from the addition is undefined behaviour. + */ +static SECP256K1_INLINE void secp256k1_i128_accum_mul(secp256k1_int128 *r, int64_t a, int64_t b); + +/* Compute a*d - b*c from signed 64-bit values and write the result to r. */ +static SECP256K1_INLINE void secp256k1_i128_det(secp256k1_int128 *r, int64_t a, int64_t b, int64_t c, int64_t d); + +/* Signed (arithmetic) right shift. + * Non-constant time in b. + */ +static SECP256K1_INLINE void secp256k1_i128_rshift(secp256k1_int128 *r, unsigned int b); + +/* Return the low 64-bits of a 128-bit value interpreted as an signed 64-bit value. */ +static SECP256K1_INLINE int64_t secp256k1_i128_to_i64(const secp256k1_int128 *a); + +/* Write a signed 64-bit value to r. */ +static SECP256K1_INLINE void secp256k1_i128_from_i64(secp256k1_int128 *r, int64_t a); + +/* Compare two 128-bit values for equality. */ +static SECP256K1_INLINE int secp256k1_i128_eq_var(const secp256k1_int128 *a, const secp256k1_int128 *b); + +/* Tests if r is equal to 2^n. + * n must be strictly less than 127. + */ +static SECP256K1_INLINE int secp256k1_i128_check_pow2(const secp256k1_int128 *r, unsigned int n); + +#endif + +#endif diff --git a/src/int128_impl.h b/src/int128_impl.h new file mode 100644 index 000000000..cfc573408 --- /dev/null +++ b/src/int128_impl.h @@ -0,0 +1,18 @@ +#ifndef SECP256K1_INT128_IMPL_H +#define SECP256K1_INT128_IMPL_H + +#include "util.h" + +#include "int128.h" + +#if defined(SECP256K1_WIDEMUL_INT128) +# if defined(SECP256K1_INT128_NATIVE) +# include "int128_native_impl.h" +# elif defined(SECP256K1_INT128_STRUCT) +# include "int128_struct_impl.h" +# else +# error "Please select int128 implementation" +# endif +#endif + +#endif diff --git a/src/int128_native.h b/src/int128_native.h new file mode 100644 index 000000000..7c97aafc7 --- /dev/null +++ b/src/int128_native.h @@ -0,0 +1,19 @@ +#ifndef SECP256K1_INT128_NATIVE_H +#define SECP256K1_INT128_NATIVE_H + +#include +#include "util.h" + +#if !defined(UINT128_MAX) && defined(__SIZEOF_INT128__) +SECP256K1_GNUC_EXT typedef unsigned __int128 uint128_t; +SECP256K1_GNUC_EXT typedef __int128 int128_t; +# define UINT128_MAX ((uint128_t)(-1)) +# define INT128_MAX ((int128_t)(UINT128_MAX >> 1)) +# define INT128_MIN (-INT128_MAX - 1) +/* No (U)INT128_C macros because compilers providing __int128 do not support 128-bit literals. */ +#endif + +typedef uint128_t secp256k1_uint128; +typedef int128_t secp256k1_int128; + +#endif diff --git a/src/int128_native_impl.h b/src/int128_native_impl.h new file mode 100644 index 000000000..e4b7f4106 --- /dev/null +++ b/src/int128_native_impl.h @@ -0,0 +1,87 @@ +#ifndef SECP256K1_INT128_NATIVE_IMPL_H +#define SECP256K1_INT128_NATIVE_IMPL_H + +#include "int128.h" + +static SECP256K1_INLINE void secp256k1_u128_load(secp256k1_uint128 *r, uint64_t hi, uint64_t lo) { + *r = (((uint128_t)hi) << 64) + lo; +} + +static SECP256K1_INLINE void secp256k1_u128_mul(secp256k1_uint128 *r, uint64_t a, uint64_t b) { + *r = (uint128_t)a * b; +} + +static SECP256K1_INLINE void secp256k1_u128_accum_mul(secp256k1_uint128 *r, uint64_t a, uint64_t b) { + *r += (uint128_t)a * b; +} + +static SECP256K1_INLINE void secp256k1_u128_accum_u64(secp256k1_uint128 *r, uint64_t a) { + *r += a; +} + +static SECP256K1_INLINE void secp256k1_u128_rshift(secp256k1_uint128 *r, unsigned int n) { + VERIFY_CHECK(n < 128); + *r >>= n; +} + +static SECP256K1_INLINE uint64_t secp256k1_u128_to_u64(const secp256k1_uint128 *a) { + return (uint64_t)(*a); +} + +static SECP256K1_INLINE uint64_t secp256k1_u128_hi_u64(const secp256k1_uint128 *a) { + return (uint64_t)(*a >> 64); +} + +static SECP256K1_INLINE void secp256k1_u128_from_u64(secp256k1_uint128 *r, uint64_t a) { + *r = a; +} + +static SECP256K1_INLINE int secp256k1_u128_check_bits(const secp256k1_uint128 *r, unsigned int n) { + VERIFY_CHECK(n < 128); + return (*r >> n == 0); +} + +static SECP256K1_INLINE void secp256k1_i128_load(secp256k1_int128 *r, int64_t hi, uint64_t lo) { + *r = (((uint128_t)(uint64_t)hi) << 64) + lo; +} + +static SECP256K1_INLINE void secp256k1_i128_mul(secp256k1_int128 *r, int64_t a, int64_t b) { + *r = (int128_t)a * b; +} + +static SECP256K1_INLINE void secp256k1_i128_accum_mul(secp256k1_int128 *r, int64_t a, int64_t b) { + int128_t ab = (int128_t)a * b; + VERIFY_CHECK(0 <= ab ? *r <= INT128_MAX - ab : INT128_MIN - ab <= *r); + *r += ab; +} + +static SECP256K1_INLINE void secp256k1_i128_det(secp256k1_int128 *r, int64_t a, int64_t b, int64_t c, int64_t d) { + int128_t ad = (int128_t)a * d; + int128_t bc = (int128_t)b * c; + VERIFY_CHECK(0 <= bc ? INT128_MIN + bc <= ad : ad <= INT128_MAX + bc); + *r = ad - bc; +} + +static SECP256K1_INLINE void secp256k1_i128_rshift(secp256k1_int128 *r, unsigned int n) { + VERIFY_CHECK(n < 128); + *r >>= n; +} + +static SECP256K1_INLINE int64_t secp256k1_i128_to_i64(const secp256k1_int128 *a) { + return *a; +} + +static SECP256K1_INLINE void secp256k1_i128_from_i64(secp256k1_int128 *r, int64_t a) { + *r = a; +} + +static SECP256K1_INLINE int secp256k1_i128_eq_var(const secp256k1_int128 *a, const secp256k1_int128 *b) { + return *a == *b; +} + +static SECP256K1_INLINE int secp256k1_i128_check_pow2(const secp256k1_int128 *r, unsigned int n) { + VERIFY_CHECK(n < 127); + return (*r == (int128_t)1 << n); +} + +#endif diff --git a/src/int128_struct.h b/src/int128_struct.h new file mode 100644 index 000000000..6156f82cc --- /dev/null +++ b/src/int128_struct.h @@ -0,0 +1,14 @@ +#ifndef SECP256K1_INT128_STRUCT_H +#define SECP256K1_INT128_STRUCT_H + +#include +#include "util.h" + +typedef struct { + uint64_t lo; + uint64_t hi; +} secp256k1_uint128; + +typedef secp256k1_uint128 secp256k1_int128; + +#endif diff --git a/src/int128_struct_impl.h b/src/int128_struct_impl.h new file mode 100644 index 000000000..b5f8fb7b6 --- /dev/null +++ b/src/int128_struct_impl.h @@ -0,0 +1,192 @@ +#ifndef SECP256K1_INT128_STRUCT_IMPL_H +#define SECP256K1_INT128_STRUCT_IMPL_H + +#include "int128.h" + +#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64)) /* MSVC */ +# include +# if defined(_M_ARM64) || defined(SECP256K1_MSVC_MULH_TEST_OVERRIDE) +/* On ARM64 MSVC, use __(u)mulh for the upper half of 64x64 multiplications. + (Define SECP256K1_MSVC_MULH_TEST_OVERRIDE to test this code path on X64, + which supports both __(u)mulh and _umul128.) */ +# if defined(SECP256K1_MSVC_MULH_TEST_OVERRIDE) +# pragma message(__FILE__ ": SECP256K1_MSVC_MULH_TEST_OVERRIDE is defined, forcing use of __(u)mulh.") +# endif +static SECP256K1_INLINE uint64_t secp256k1_umul128(uint64_t a, uint64_t b, uint64_t* hi) { + *hi = __umulh(a, b); + return a * b; +} + +static SECP256K1_INLINE int64_t secp256k1_mul128(int64_t a, int64_t b, int64_t* hi) { + *hi = __mulh(a, b); + return (uint64_t)a * (uint64_t)b; +} +# else +/* On x84_64 MSVC, use native _(u)mul128 for 64x64->128 multiplications. */ +# define secp256k1_umul128 _umul128 +# define secp256k1_mul128 _mul128 +# endif +#else +/* On other systems, emulate 64x64->128 multiplications using 32x32->64 multiplications. */ +static SECP256K1_INLINE uint64_t secp256k1_umul128(uint64_t a, uint64_t b, uint64_t* hi) { + uint64_t ll = (uint64_t)(uint32_t)a * (uint32_t)b; + uint64_t lh = (uint32_t)a * (b >> 32); + uint64_t hl = (a >> 32) * (uint32_t)b; + uint64_t hh = (a >> 32) * (b >> 32); + uint64_t mid34 = (ll >> 32) + (uint32_t)lh + (uint32_t)hl; + *hi = hh + (lh >> 32) + (hl >> 32) + (mid34 >> 32); + return (mid34 << 32) + (uint32_t)ll; +} + +static SECP256K1_INLINE int64_t secp256k1_mul128(int64_t a, int64_t b, int64_t* hi) { + uint64_t ll = (uint64_t)(uint32_t)a * (uint32_t)b; + int64_t lh = (uint32_t)a * (b >> 32); + int64_t hl = (a >> 32) * (uint32_t)b; + int64_t hh = (a >> 32) * (b >> 32); + uint64_t mid34 = (ll >> 32) + (uint32_t)lh + (uint32_t)hl; + *hi = hh + (lh >> 32) + (hl >> 32) + (mid34 >> 32); + return (mid34 << 32) + (uint32_t)ll; +} +#endif + +static SECP256K1_INLINE void secp256k1_u128_load(secp256k1_uint128 *r, uint64_t hi, uint64_t lo) { + r->hi = hi; + r->lo = lo; +} + +static SECP256K1_INLINE void secp256k1_u128_mul(secp256k1_uint128 *r, uint64_t a, uint64_t b) { + r->lo = secp256k1_umul128(a, b, &r->hi); +} + +static SECP256K1_INLINE void secp256k1_u128_accum_mul(secp256k1_uint128 *r, uint64_t a, uint64_t b) { + uint64_t lo, hi; + lo = secp256k1_umul128(a, b, &hi); + r->lo += lo; + r->hi += hi + (r->lo < lo); +} + +static SECP256K1_INLINE void secp256k1_u128_accum_u64(secp256k1_uint128 *r, uint64_t a) { + r->lo += a; + r->hi += r->lo < a; +} + +/* Unsigned (logical) right shift. + * Non-constant time in n. + */ +static SECP256K1_INLINE void secp256k1_u128_rshift(secp256k1_uint128 *r, unsigned int n) { + VERIFY_CHECK(n < 128); + if (n >= 64) { + r->lo = r->hi >> (n-64); + r->hi = 0; + } else if (n > 0) { + r->lo = ((1U * r->hi) << (64-n)) | r->lo >> n; + r->hi >>= n; + } +} + +static SECP256K1_INLINE uint64_t secp256k1_u128_to_u64(const secp256k1_uint128 *a) { + return a->lo; +} + +static SECP256K1_INLINE uint64_t secp256k1_u128_hi_u64(const secp256k1_uint128 *a) { + return a->hi; +} + +static SECP256K1_INLINE void secp256k1_u128_from_u64(secp256k1_uint128 *r, uint64_t a) { + r->hi = 0; + r->lo = a; +} + +static SECP256K1_INLINE int secp256k1_u128_check_bits(const secp256k1_uint128 *r, unsigned int n) { + VERIFY_CHECK(n < 128); + return n >= 64 ? r->hi >> (n - 64) == 0 + : r->hi == 0 && r->lo >> n == 0; +} + +static SECP256K1_INLINE void secp256k1_i128_load(secp256k1_int128 *r, int64_t hi, uint64_t lo) { + r->hi = hi; + r->lo = lo; +} + +static SECP256K1_INLINE void secp256k1_i128_mul(secp256k1_int128 *r, int64_t a, int64_t b) { + int64_t hi; + r->lo = (uint64_t)secp256k1_mul128(a, b, &hi); + r->hi = (uint64_t)hi; +} + +static SECP256K1_INLINE void secp256k1_i128_accum_mul(secp256k1_int128 *r, int64_t a, int64_t b) { + int64_t hi; + uint64_t lo = (uint64_t)secp256k1_mul128(a, b, &hi); + r->lo += lo; + hi += r->lo < lo; + /* Verify no overflow. + * If r represents a positive value (the sign bit is not set) and the value we are adding is a positive value (the sign bit is not set), + * then we require that the resulting value also be positive (the sign bit is not set). + * Note that (X <= Y) means (X implies Y) when X and Y are boolean values (i.e. 0 or 1). + */ + VERIFY_CHECK((r->hi <= 0x7fffffffffffffffu && (uint64_t)hi <= 0x7fffffffffffffffu) <= (r->hi + (uint64_t)hi <= 0x7fffffffffffffffu)); + /* Verify no underflow. + * If r represents a negative value (the sign bit is set) and the value we are adding is a negative value (the sign bit is set), + * then we require that the resulting value also be negative (the sign bit is set). + */ + VERIFY_CHECK((r->hi > 0x7fffffffffffffffu && (uint64_t)hi > 0x7fffffffffffffffu) <= (r->hi + (uint64_t)hi > 0x7fffffffffffffffu)); + r->hi += hi; +} + +static SECP256K1_INLINE void secp256k1_i128_dissip_mul(secp256k1_int128 *r, int64_t a, int64_t b) { + int64_t hi; + uint64_t lo = (uint64_t)secp256k1_mul128(a, b, &hi); + hi += r->lo < lo; + /* Verify no overflow. + * If r represents a positive value (the sign bit is not set) and the value we are subtracting is a negative value (the sign bit is set), + * then we require that the resulting value also be positive (the sign bit is not set). + */ + VERIFY_CHECK((r->hi <= 0x7fffffffffffffffu && (uint64_t)hi > 0x7fffffffffffffffu) <= (r->hi - (uint64_t)hi <= 0x7fffffffffffffffu)); + /* Verify no underflow. + * If r represents a negative value (the sign bit is set) and the value we are subtracting is a positive value (the sign sign bit is not set), + * then we require that the resulting value also be negative (the sign bit is set). + */ + VERIFY_CHECK((r->hi > 0x7fffffffffffffffu && (uint64_t)hi <= 0x7fffffffffffffffu) <= (r->hi - (uint64_t)hi > 0x7fffffffffffffffu)); + r->hi -= hi; + r->lo -= lo; +} + +static SECP256K1_INLINE void secp256k1_i128_det(secp256k1_int128 *r, int64_t a, int64_t b, int64_t c, int64_t d) { + secp256k1_i128_mul(r, a, d); + secp256k1_i128_dissip_mul(r, b, c); +} + +/* Signed (arithmetic) right shift. + * Non-constant time in n. + */ +static SECP256K1_INLINE void secp256k1_i128_rshift(secp256k1_int128 *r, unsigned int n) { + VERIFY_CHECK(n < 128); + if (n >= 64) { + r->lo = (uint64_t)((int64_t)(r->hi) >> (n-64)); + r->hi = (uint64_t)((int64_t)(r->hi) >> 63); + } else if (n > 0) { + r->lo = ((1U * r->hi) << (64-n)) | r->lo >> n; + r->hi = (uint64_t)((int64_t)(r->hi) >> n); + } +} + +static SECP256K1_INLINE int64_t secp256k1_i128_to_i64(const secp256k1_int128 *a) { + return (int64_t)a->lo; +} + +static SECP256K1_INLINE void secp256k1_i128_from_i64(secp256k1_int128 *r, int64_t a) { + r->hi = (uint64_t)(a >> 63); + r->lo = (uint64_t)a; +} + +static SECP256K1_INLINE int secp256k1_i128_eq_var(const secp256k1_int128 *a, const secp256k1_int128 *b) { + return a->hi == b->hi && a->lo == b->lo; +} + +static SECP256K1_INLINE int secp256k1_i128_check_pow2(const secp256k1_int128 *r, unsigned int n) { + VERIFY_CHECK(n < 127); + return n >= 64 ? r->hi == (uint64_t)1 << (n - 64) && r->lo == 0 + : r->hi == 0 && r->lo == (uint64_t)1 << n; +} + +#endif diff --git a/src/modinv64_impl.h b/src/modinv64_impl.h index 905ef47be..f17e05023 100644 --- a/src/modinv64_impl.h +++ b/src/modinv64_impl.h @@ -7,10 +7,9 @@ #ifndef SECP256K1_MODINV64_IMPL_H #define SECP256K1_MODINV64_IMPL_H +#include "int128.h" #include "modinv64.h" -#include "util.h" - /* This file implements modular inversion based on the paper "Fast constant-time gcd computation and * modular inversion" by Daniel J. Bernstein and Bo-Yin Yang. * @@ -18,6 +17,15 @@ * implementation for N=62, using 62-bit signed limbs represented as int64_t. */ +/* Data type for transition matrices (see section 3 of explanation). + * + * t = [ u v ] + * [ q r ] + */ +typedef struct { + int64_t u, v, q, r; +} secp256k1_modinv64_trans2x2; + #ifdef VERIFY /* Helper function to compute the absolute value of an int64_t. * (we don't use abs/labs/llabs as it depends on the int sizes). */ @@ -32,15 +40,17 @@ static const secp256k1_modinv64_signed62 SECP256K1_SIGNED62_ONE = {{1}}; /* Compute a*factor and put it in r. All but the top limb in r will be in range [0,2^62). */ static void secp256k1_modinv64_mul_62(secp256k1_modinv64_signed62 *r, const secp256k1_modinv64_signed62 *a, int alen, int64_t factor) { const int64_t M62 = (int64_t)(UINT64_MAX >> 2); - int128_t c = 0; + secp256k1_int128 c, d; int i; + secp256k1_i128_from_i64(&c, 0); for (i = 0; i < 4; ++i) { - if (i < alen) c += (int128_t)a->v[i] * factor; - r->v[i] = (int64_t)c & M62; c >>= 62; + if (i < alen) secp256k1_i128_accum_mul(&c, a->v[i], factor); + r->v[i] = secp256k1_i128_to_i64(&c) & M62; secp256k1_i128_rshift(&c, 62); } - if (4 < alen) c += (int128_t)a->v[4] * factor; - VERIFY_CHECK(c == (int64_t)c); - r->v[4] = (int64_t)c; + if (4 < alen) secp256k1_i128_accum_mul(&c, a->v[4], factor); + secp256k1_i128_from_i64(&d, secp256k1_i128_to_i64(&c)); + VERIFY_CHECK(secp256k1_i128_eq_var(&c, &d)); + r->v[4] = secp256k1_i128_to_i64(&c); } /* Return -1 for ab*factor. A has alen limbs; b has 5. */ @@ -60,6 +70,13 @@ static int secp256k1_modinv64_mul_cmp_62(const secp256k1_modinv64_signed62 *a, i } return 0; } + +/* Check if the determinant of t is equal to 1 << n. */ +static int secp256k1_modinv64_det_check_pow2(const secp256k1_modinv64_trans2x2 *t, unsigned int n) { + secp256k1_int128 a; + secp256k1_i128_det(&a, t->u, t->v, t->q, t->r); + return secp256k1_i128_check_pow2(&a, n); +} #endif /* Take as input a signed62 number in range (-2*modulus,modulus), and add a multiple of the modulus @@ -136,15 +153,6 @@ static void secp256k1_modinv64_normalize_62(secp256k1_modinv64_signed62 *r, int6 #endif } -/* Data type for transition matrices (see section 3 of explanation). - * - * t = [ u v ] - * [ q r ] - */ -typedef struct { - int64_t u, v, q, r; -} secp256k1_modinv64_trans2x2; - /* Compute the transition matrix and eta for 59 divsteps (where zeta=-(delta+1/2)). * Note that the transformation matrix is scaled by 2^62 and not 2^59. * @@ -206,13 +214,15 @@ static int64_t secp256k1_modinv64_divsteps_59(int64_t zeta, uint64_t f0, uint64_ t->v = (int64_t)v; t->q = (int64_t)q; t->r = (int64_t)r; +#ifdef VERIFY /* The determinant of t must be a power of two. This guarantees that multiplication with t * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which * will be divided out again). As each divstep's individual matrix has determinant 2, the * aggregate of 59 of them will have determinant 2^59. Multiplying with the initial * 8*identity (which has determinant 2^6) means the overall outputs has determinant * 2^65. */ - VERIFY_CHECK((int128_t)t->u * t->r - (int128_t)t->v * t->q == ((int128_t)1) << 65); + VERIFY_CHECK(secp256k1_modinv64_det_check_pow2(t, 65)); +#endif return zeta; } @@ -289,11 +299,13 @@ static int64_t secp256k1_modinv64_divsteps_62_var(int64_t eta, uint64_t f0, uint t->v = (int64_t)v; t->q = (int64_t)q; t->r = (int64_t)r; +#ifdef VERIFY /* The determinant of t must be a power of two. This guarantees that multiplication with t * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which * will be divided out again). As each divstep's individual matrix has determinant 2, the * aggregate of 62 of them will have determinant 2^62. */ - VERIFY_CHECK((int128_t)t->u * t->r - (int128_t)t->v * t->q == ((int128_t)1) << 62); + VERIFY_CHECK(secp256k1_modinv64_det_check_pow2(t, 62)); +#endif return eta; } @@ -310,7 +322,7 @@ static void secp256k1_modinv64_update_de_62(secp256k1_modinv64_signed62 *d, secp const int64_t e0 = e->v[0], e1 = e->v[1], e2 = e->v[2], e3 = e->v[3], e4 = e->v[4]; const int64_t u = t->u, v = t->v, q = t->q, r = t->r; int64_t md, me, sd, se; - int128_t cd, ce; + secp256k1_int128 cd, ce; #ifdef VERIFY VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(d, 5, &modinfo->modulus, -2) > 0); /* d > -2*modulus */ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(d, 5, &modinfo->modulus, 1) < 0); /* d < modulus */ @@ -327,54 +339,64 @@ static void secp256k1_modinv64_update_de_62(secp256k1_modinv64_signed62 *d, secp md = (u & sd) + (v & se); me = (q & sd) + (r & se); /* Begin computing t*[d,e]. */ - cd = (int128_t)u * d0 + (int128_t)v * e0; - ce = (int128_t)q * d0 + (int128_t)r * e0; + secp256k1_i128_mul(&cd, u, d0); + secp256k1_i128_accum_mul(&cd, v, e0); + secp256k1_i128_mul(&ce, q, d0); + secp256k1_i128_accum_mul(&ce, r, e0); /* Correct md,me so that t*[d,e]+modulus*[md,me] has 62 zero bottom bits. */ - md -= (modinfo->modulus_inv62 * (uint64_t)cd + md) & M62; - me -= (modinfo->modulus_inv62 * (uint64_t)ce + me) & M62; + md -= (modinfo->modulus_inv62 * (uint64_t)secp256k1_i128_to_i64(&cd) + md) & M62; + me -= (modinfo->modulus_inv62 * (uint64_t)secp256k1_i128_to_i64(&ce) + me) & M62; /* Update the beginning of computation for t*[d,e]+modulus*[md,me] now md,me are known. */ - cd += (int128_t)modinfo->modulus.v[0] * md; - ce += (int128_t)modinfo->modulus.v[0] * me; + secp256k1_i128_accum_mul(&cd, modinfo->modulus.v[0], md); + secp256k1_i128_accum_mul(&ce, modinfo->modulus.v[0], me); /* Verify that the low 62 bits of the computation are indeed zero, and then throw them away. */ - VERIFY_CHECK(((int64_t)cd & M62) == 0); cd >>= 62; - VERIFY_CHECK(((int64_t)ce & M62) == 0); ce >>= 62; + VERIFY_CHECK((secp256k1_i128_to_i64(&cd) & M62) == 0); secp256k1_i128_rshift(&cd, 62); + VERIFY_CHECK((secp256k1_i128_to_i64(&ce) & M62) == 0); secp256k1_i128_rshift(&ce, 62); /* Compute limb 1 of t*[d,e]+modulus*[md,me], and store it as output limb 0 (= down shift). */ - cd += (int128_t)u * d1 + (int128_t)v * e1; - ce += (int128_t)q * d1 + (int128_t)r * e1; + secp256k1_i128_accum_mul(&cd, u, d1); + secp256k1_i128_accum_mul(&cd, v, e1); + secp256k1_i128_accum_mul(&ce, q, d1); + secp256k1_i128_accum_mul(&ce, r, e1); if (modinfo->modulus.v[1]) { /* Optimize for the case where limb of modulus is zero. */ - cd += (int128_t)modinfo->modulus.v[1] * md; - ce += (int128_t)modinfo->modulus.v[1] * me; + secp256k1_i128_accum_mul(&cd, modinfo->modulus.v[1], md); + secp256k1_i128_accum_mul(&ce, modinfo->modulus.v[1], me); } - d->v[0] = (int64_t)cd & M62; cd >>= 62; - e->v[0] = (int64_t)ce & M62; ce >>= 62; + d->v[0] = secp256k1_i128_to_i64(&cd) & M62; secp256k1_i128_rshift(&cd, 62); + e->v[0] = secp256k1_i128_to_i64(&ce) & M62; secp256k1_i128_rshift(&ce, 62); /* Compute limb 2 of t*[d,e]+modulus*[md,me], and store it as output limb 1. */ - cd += (int128_t)u * d2 + (int128_t)v * e2; - ce += (int128_t)q * d2 + (int128_t)r * e2; + secp256k1_i128_accum_mul(&cd, u, d2); + secp256k1_i128_accum_mul(&cd, v, e2); + secp256k1_i128_accum_mul(&ce, q, d2); + secp256k1_i128_accum_mul(&ce, r, e2); if (modinfo->modulus.v[2]) { /* Optimize for the case where limb of modulus is zero. */ - cd += (int128_t)modinfo->modulus.v[2] * md; - ce += (int128_t)modinfo->modulus.v[2] * me; + secp256k1_i128_accum_mul(&cd, modinfo->modulus.v[2], md); + secp256k1_i128_accum_mul(&ce, modinfo->modulus.v[2], me); } - d->v[1] = (int64_t)cd & M62; cd >>= 62; - e->v[1] = (int64_t)ce & M62; ce >>= 62; + d->v[1] = secp256k1_i128_to_i64(&cd) & M62; secp256k1_i128_rshift(&cd, 62); + e->v[1] = secp256k1_i128_to_i64(&ce) & M62; secp256k1_i128_rshift(&ce, 62); /* Compute limb 3 of t*[d,e]+modulus*[md,me], and store it as output limb 2. */ - cd += (int128_t)u * d3 + (int128_t)v * e3; - ce += (int128_t)q * d3 + (int128_t)r * e3; + secp256k1_i128_accum_mul(&cd, u, d3); + secp256k1_i128_accum_mul(&cd, v, e3); + secp256k1_i128_accum_mul(&ce, q, d3); + secp256k1_i128_accum_mul(&ce, r, e3); if (modinfo->modulus.v[3]) { /* Optimize for the case where limb of modulus is zero. */ - cd += (int128_t)modinfo->modulus.v[3] * md; - ce += (int128_t)modinfo->modulus.v[3] * me; + secp256k1_i128_accum_mul(&cd, modinfo->modulus.v[3], md); + secp256k1_i128_accum_mul(&ce, modinfo->modulus.v[3], me); } - d->v[2] = (int64_t)cd & M62; cd >>= 62; - e->v[2] = (int64_t)ce & M62; ce >>= 62; + d->v[2] = secp256k1_i128_to_i64(&cd) & M62; secp256k1_i128_rshift(&cd, 62); + e->v[2] = secp256k1_i128_to_i64(&ce) & M62; secp256k1_i128_rshift(&ce, 62); /* Compute limb 4 of t*[d,e]+modulus*[md,me], and store it as output limb 3. */ - cd += (int128_t)u * d4 + (int128_t)v * e4; - ce += (int128_t)q * d4 + (int128_t)r * e4; - cd += (int128_t)modinfo->modulus.v[4] * md; - ce += (int128_t)modinfo->modulus.v[4] * me; - d->v[3] = (int64_t)cd & M62; cd >>= 62; - e->v[3] = (int64_t)ce & M62; ce >>= 62; + secp256k1_i128_accum_mul(&cd, u, d4); + secp256k1_i128_accum_mul(&cd, v, e4); + secp256k1_i128_accum_mul(&ce, q, d4); + secp256k1_i128_accum_mul(&ce, r, e4); + secp256k1_i128_accum_mul(&cd, modinfo->modulus.v[4], md); + secp256k1_i128_accum_mul(&ce, modinfo->modulus.v[4], me); + d->v[3] = secp256k1_i128_to_i64(&cd) & M62; secp256k1_i128_rshift(&cd, 62); + e->v[3] = secp256k1_i128_to_i64(&ce) & M62; secp256k1_i128_rshift(&ce, 62); /* What remains is limb 5 of t*[d,e]+modulus*[md,me]; store it as output limb 4. */ - d->v[4] = (int64_t)cd; - e->v[4] = (int64_t)ce; + d->v[4] = secp256k1_i128_to_i64(&cd); + e->v[4] = secp256k1_i128_to_i64(&ce); #ifdef VERIFY VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(d, 5, &modinfo->modulus, -2) > 0); /* d > -2*modulus */ VERIFY_CHECK(secp256k1_modinv64_mul_cmp_62(d, 5, &modinfo->modulus, 1) < 0); /* d < modulus */ @@ -392,36 +414,46 @@ static void secp256k1_modinv64_update_fg_62(secp256k1_modinv64_signed62 *f, secp const int64_t f0 = f->v[0], f1 = f->v[1], f2 = f->v[2], f3 = f->v[3], f4 = f->v[4]; const int64_t g0 = g->v[0], g1 = g->v[1], g2 = g->v[2], g3 = g->v[3], g4 = g->v[4]; const int64_t u = t->u, v = t->v, q = t->q, r = t->r; - int128_t cf, cg; + secp256k1_int128 cf, cg; /* Start computing t*[f,g]. */ - cf = (int128_t)u * f0 + (int128_t)v * g0; - cg = (int128_t)q * f0 + (int128_t)r * g0; + secp256k1_i128_mul(&cf, u, f0); + secp256k1_i128_accum_mul(&cf, v, g0); + secp256k1_i128_mul(&cg, q, f0); + secp256k1_i128_accum_mul(&cg, r, g0); /* Verify that the bottom 62 bits of the result are zero, and then throw them away. */ - VERIFY_CHECK(((int64_t)cf & M62) == 0); cf >>= 62; - VERIFY_CHECK(((int64_t)cg & M62) == 0); cg >>= 62; + VERIFY_CHECK((secp256k1_i128_to_i64(&cf) & M62) == 0); secp256k1_i128_rshift(&cf, 62); + VERIFY_CHECK((secp256k1_i128_to_i64(&cg) & M62) == 0); secp256k1_i128_rshift(&cg, 62); /* Compute limb 1 of t*[f,g], and store it as output limb 0 (= down shift). */ - cf += (int128_t)u * f1 + (int128_t)v * g1; - cg += (int128_t)q * f1 + (int128_t)r * g1; - f->v[0] = (int64_t)cf & M62; cf >>= 62; - g->v[0] = (int64_t)cg & M62; cg >>= 62; + secp256k1_i128_accum_mul(&cf, u, f1); + secp256k1_i128_accum_mul(&cf, v, g1); + secp256k1_i128_accum_mul(&cg, q, f1); + secp256k1_i128_accum_mul(&cg, r, g1); + f->v[0] = secp256k1_i128_to_i64(&cf) & M62; secp256k1_i128_rshift(&cf, 62); + g->v[0] = secp256k1_i128_to_i64(&cg) & M62; secp256k1_i128_rshift(&cg, 62); /* Compute limb 2 of t*[f,g], and store it as output limb 1. */ - cf += (int128_t)u * f2 + (int128_t)v * g2; - cg += (int128_t)q * f2 + (int128_t)r * g2; - f->v[1] = (int64_t)cf & M62; cf >>= 62; - g->v[1] = (int64_t)cg & M62; cg >>= 62; + secp256k1_i128_accum_mul(&cf, u, f2); + secp256k1_i128_accum_mul(&cf, v, g2); + secp256k1_i128_accum_mul(&cg, q, f2); + secp256k1_i128_accum_mul(&cg, r, g2); + f->v[1] = secp256k1_i128_to_i64(&cf) & M62; secp256k1_i128_rshift(&cf, 62); + g->v[1] = secp256k1_i128_to_i64(&cg) & M62; secp256k1_i128_rshift(&cg, 62); /* Compute limb 3 of t*[f,g], and store it as output limb 2. */ - cf += (int128_t)u * f3 + (int128_t)v * g3; - cg += (int128_t)q * f3 + (int128_t)r * g3; - f->v[2] = (int64_t)cf & M62; cf >>= 62; - g->v[2] = (int64_t)cg & M62; cg >>= 62; + secp256k1_i128_accum_mul(&cf, u, f3); + secp256k1_i128_accum_mul(&cf, v, g3); + secp256k1_i128_accum_mul(&cg, q, f3); + secp256k1_i128_accum_mul(&cg, r, g3); + f->v[2] = secp256k1_i128_to_i64(&cf) & M62; secp256k1_i128_rshift(&cf, 62); + g->v[2] = secp256k1_i128_to_i64(&cg) & M62; secp256k1_i128_rshift(&cg, 62); /* Compute limb 4 of t*[f,g], and store it as output limb 3. */ - cf += (int128_t)u * f4 + (int128_t)v * g4; - cg += (int128_t)q * f4 + (int128_t)r * g4; - f->v[3] = (int64_t)cf & M62; cf >>= 62; - g->v[3] = (int64_t)cg & M62; cg >>= 62; + secp256k1_i128_accum_mul(&cf, u, f4); + secp256k1_i128_accum_mul(&cf, v, g4); + secp256k1_i128_accum_mul(&cg, q, f4); + secp256k1_i128_accum_mul(&cg, r, g4); + f->v[3] = secp256k1_i128_to_i64(&cf) & M62; secp256k1_i128_rshift(&cf, 62); + g->v[3] = secp256k1_i128_to_i64(&cg) & M62; secp256k1_i128_rshift(&cg, 62); /* What remains is limb 5 of t*[f,g]; store it as output limb 4. */ - f->v[4] = (int64_t)cf; - g->v[4] = (int64_t)cg; + f->v[4] = secp256k1_i128_to_i64(&cf); + g->v[4] = secp256k1_i128_to_i64(&cg); } /* Compute (t/2^62) * [f, g], where t is a transition matrix for 62 divsteps. @@ -434,30 +466,34 @@ static void secp256k1_modinv64_update_fg_62_var(int len, secp256k1_modinv64_sign const int64_t M62 = (int64_t)(UINT64_MAX >> 2); const int64_t u = t->u, v = t->v, q = t->q, r = t->r; int64_t fi, gi; - int128_t cf, cg; + secp256k1_int128 cf, cg; int i; VERIFY_CHECK(len > 0); /* Start computing t*[f,g]. */ fi = f->v[0]; gi = g->v[0]; - cf = (int128_t)u * fi + (int128_t)v * gi; - cg = (int128_t)q * fi + (int128_t)r * gi; + secp256k1_i128_mul(&cf, u, fi); + secp256k1_i128_accum_mul(&cf, v, gi); + secp256k1_i128_mul(&cg, q, fi); + secp256k1_i128_accum_mul(&cg, r, gi); /* Verify that the bottom 62 bits of the result are zero, and then throw them away. */ - VERIFY_CHECK(((int64_t)cf & M62) == 0); cf >>= 62; - VERIFY_CHECK(((int64_t)cg & M62) == 0); cg >>= 62; + VERIFY_CHECK((secp256k1_i128_to_i64(&cf) & M62) == 0); secp256k1_i128_rshift(&cf, 62); + VERIFY_CHECK((secp256k1_i128_to_i64(&cg) & M62) == 0); secp256k1_i128_rshift(&cg, 62); /* Now iteratively compute limb i=1..len of t*[f,g], and store them in output limb i-1 (shifting * down by 62 bits). */ for (i = 1; i < len; ++i) { fi = f->v[i]; gi = g->v[i]; - cf += (int128_t)u * fi + (int128_t)v * gi; - cg += (int128_t)q * fi + (int128_t)r * gi; - f->v[i - 1] = (int64_t)cf & M62; cf >>= 62; - g->v[i - 1] = (int64_t)cg & M62; cg >>= 62; + secp256k1_i128_accum_mul(&cf, u, fi); + secp256k1_i128_accum_mul(&cf, v, gi); + secp256k1_i128_accum_mul(&cg, q, fi); + secp256k1_i128_accum_mul(&cg, r, gi); + f->v[i - 1] = secp256k1_i128_to_i64(&cf) & M62; secp256k1_i128_rshift(&cf, 62); + g->v[i - 1] = secp256k1_i128_to_i64(&cg) & M62; secp256k1_i128_rshift(&cg, 62); } /* What remains is limb (len) of t*[f,g]; store it as output limb (len-1). */ - f->v[len - 1] = (int64_t)cf; - g->v[len - 1] = (int64_t)cg; + f->v[len - 1] = secp256k1_i128_to_i64(&cf); + g->v[len - 1] = secp256k1_i128_to_i64(&cg); } /* Compute the inverse of x modulo modinfo->modulus, and replace x with it (constant time in x). */ diff --git a/src/precompute_ecmult.c b/src/precompute_ecmult.c index 5ccbcb3c5..2aa37b8fe 100644 --- a/src/precompute_ecmult.c +++ b/src/precompute_ecmult.c @@ -14,10 +14,13 @@ #endif #include "../include/secp256k1.h" + #include "assumptions.h" #include "util.h" + #include "field_impl.h" #include "group_impl.h" +#include "int128_impl.h" #include "ecmult.h" #include "ecmult_compute_table_impl.h" diff --git a/src/precompute_ecmult_gen.c b/src/precompute_ecmult_gen.c index 7c6359c40..a4ec8e0dc 100644 --- a/src/precompute_ecmult_gen.c +++ b/src/precompute_ecmult_gen.c @@ -8,9 +8,12 @@ #include #include "../include/secp256k1.h" + #include "assumptions.h" #include "util.h" + #include "group.h" +#include "int128_impl.h" #include "ecmult_gen.h" #include "ecmult_gen_compute_table_impl.h" diff --git a/src/scalar_4x64_impl.h b/src/scalar_4x64_impl.h index 426c41f1a..247f9e5ae 100644 --- a/src/scalar_4x64_impl.h +++ b/src/scalar_4x64_impl.h @@ -7,9 +7,8 @@ #ifndef SECP256K1_SCALAR_REPR_IMPL_H #define SECP256K1_SCALAR_REPR_IMPL_H -#include "scalar.h" #include - +#include "int128.h" #include "modinv64_impl.h" /* Limbs of the secp256k1 order. */ @@ -79,51 +78,62 @@ SECP256K1_INLINE static int secp256k1_scalar_check_overflow(const secp256k1_scal } SECP256K1_INLINE static int secp256k1_scalar_reduce(secp256k1_scalar *r, unsigned int overflow) { - uint128_t t; + secp256k1_uint128 t; VERIFY_CHECK(overflow <= 1); - t = (uint128_t)r->d[0] + overflow * SECP256K1_N_C_0; - r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; - t += (uint128_t)r->d[1] + overflow * SECP256K1_N_C_1; - r->d[1] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; - t += (uint128_t)r->d[2] + overflow * SECP256K1_N_C_2; - r->d[2] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; - t += (uint64_t)r->d[3]; - r->d[3] = t & 0xFFFFFFFFFFFFFFFFULL; + secp256k1_u128_from_u64(&t, r->d[0]); + secp256k1_u128_accum_u64(&t, overflow * SECP256K1_N_C_0); + r->d[0] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64); + secp256k1_u128_accum_u64(&t, r->d[1]); + secp256k1_u128_accum_u64(&t, overflow * SECP256K1_N_C_1); + r->d[1] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64); + secp256k1_u128_accum_u64(&t, r->d[2]); + secp256k1_u128_accum_u64(&t, overflow * SECP256K1_N_C_2); + r->d[2] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64); + secp256k1_u128_accum_u64(&t, r->d[3]); + r->d[3] = secp256k1_u128_to_u64(&t); return overflow; } static int secp256k1_scalar_add(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b) { int overflow; - uint128_t t = (uint128_t)a->d[0] + b->d[0]; - r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; - t += (uint128_t)a->d[1] + b->d[1]; - r->d[1] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; - t += (uint128_t)a->d[2] + b->d[2]; - r->d[2] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; - t += (uint128_t)a->d[3] + b->d[3]; - r->d[3] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; - overflow = t + secp256k1_scalar_check_overflow(r); + secp256k1_uint128 t; + secp256k1_u128_from_u64(&t, a->d[0]); + secp256k1_u128_accum_u64(&t, b->d[0]); + r->d[0] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64); + secp256k1_u128_accum_u64(&t, a->d[1]); + secp256k1_u128_accum_u64(&t, b->d[1]); + r->d[1] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64); + secp256k1_u128_accum_u64(&t, a->d[2]); + secp256k1_u128_accum_u64(&t, b->d[2]); + r->d[2] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64); + secp256k1_u128_accum_u64(&t, a->d[3]); + secp256k1_u128_accum_u64(&t, b->d[3]); + r->d[3] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64); + overflow = secp256k1_u128_to_u64(&t) + secp256k1_scalar_check_overflow(r); VERIFY_CHECK(overflow == 0 || overflow == 1); secp256k1_scalar_reduce(r, overflow); return overflow; } static void secp256k1_scalar_cadd_bit(secp256k1_scalar *r, unsigned int bit, int flag) { - uint128_t t; + secp256k1_uint128 t; volatile int vflag = flag; VERIFY_CHECK(bit < 256); bit += ((uint32_t) vflag - 1) & 0x100; /* forcing (bit >> 6) > 3 makes this a noop */ - t = (uint128_t)r->d[0] + (((uint64_t)((bit >> 6) == 0)) << (bit & 0x3F)); - r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; - t += (uint128_t)r->d[1] + (((uint64_t)((bit >> 6) == 1)) << (bit & 0x3F)); - r->d[1] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; - t += (uint128_t)r->d[2] + (((uint64_t)((bit >> 6) == 2)) << (bit & 0x3F)); - r->d[2] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64; - t += (uint128_t)r->d[3] + (((uint64_t)((bit >> 6) == 3)) << (bit & 0x3F)); - r->d[3] = t & 0xFFFFFFFFFFFFFFFFULL; + secp256k1_u128_from_u64(&t, r->d[0]); + secp256k1_u128_accum_u64(&t, ((uint64_t)((bit >> 6) == 0)) << (bit & 0x3F)); + r->d[0] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64); + secp256k1_u128_accum_u64(&t, r->d[1]); + secp256k1_u128_accum_u64(&t, ((uint64_t)((bit >> 6) == 1)) << (bit & 0x3F)); + r->d[1] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64); + secp256k1_u128_accum_u64(&t, r->d[2]); + secp256k1_u128_accum_u64(&t, ((uint64_t)((bit >> 6) == 2)) << (bit & 0x3F)); + r->d[2] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64); + secp256k1_u128_accum_u64(&t, r->d[3]); + secp256k1_u128_accum_u64(&t, ((uint64_t)((bit >> 6) == 3)) << (bit & 0x3F)); + r->d[3] = secp256k1_u128_to_u64(&t); #ifdef VERIFY - VERIFY_CHECK((t >> 64) == 0); - VERIFY_CHECK(secp256k1_scalar_check_overflow(r) == 0); + VERIFY_CHECK(secp256k1_u128_hi_u64(&t) == 0); #endif } @@ -152,14 +162,19 @@ SECP256K1_INLINE static int secp256k1_scalar_is_zero(const secp256k1_scalar *a) static void secp256k1_scalar_negate(secp256k1_scalar *r, const secp256k1_scalar *a) { uint64_t nonzero = 0xFFFFFFFFFFFFFFFFULL * (secp256k1_scalar_is_zero(a) == 0); - uint128_t t = (uint128_t)(~a->d[0]) + SECP256K1_N_0 + 1; - r->d[0] = t & nonzero; t >>= 64; - t += (uint128_t)(~a->d[1]) + SECP256K1_N_1; - r->d[1] = t & nonzero; t >>= 64; - t += (uint128_t)(~a->d[2]) + SECP256K1_N_2; - r->d[2] = t & nonzero; t >>= 64; - t += (uint128_t)(~a->d[3]) + SECP256K1_N_3; - r->d[3] = t & nonzero; + secp256k1_uint128 t; + secp256k1_u128_from_u64(&t, ~a->d[0]); + secp256k1_u128_accum_u64(&t, SECP256K1_N_0 + 1); + r->d[0] = secp256k1_u128_to_u64(&t) & nonzero; secp256k1_u128_rshift(&t, 64); + secp256k1_u128_accum_u64(&t, ~a->d[1]); + secp256k1_u128_accum_u64(&t, SECP256K1_N_1); + r->d[1] = secp256k1_u128_to_u64(&t) & nonzero; secp256k1_u128_rshift(&t, 64); + secp256k1_u128_accum_u64(&t, ~a->d[2]); + secp256k1_u128_accum_u64(&t, SECP256K1_N_2); + r->d[2] = secp256k1_u128_to_u64(&t) & nonzero; secp256k1_u128_rshift(&t, 64); + secp256k1_u128_accum_u64(&t, ~a->d[3]); + secp256k1_u128_accum_u64(&t, SECP256K1_N_3); + r->d[3] = secp256k1_u128_to_u64(&t) & nonzero; } SECP256K1_INLINE static int secp256k1_scalar_is_one(const secp256k1_scalar *a) { @@ -184,14 +199,19 @@ static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) { volatile int vflag = flag; uint64_t mask = -vflag; uint64_t nonzero = (secp256k1_scalar_is_zero(r) != 0) - 1; - uint128_t t = (uint128_t)(r->d[0] ^ mask) + ((SECP256K1_N_0 + 1) & mask); - r->d[0] = t & nonzero; t >>= 64; - t += (uint128_t)(r->d[1] ^ mask) + (SECP256K1_N_1 & mask); - r->d[1] = t & nonzero; t >>= 64; - t += (uint128_t)(r->d[2] ^ mask) + (SECP256K1_N_2 & mask); - r->d[2] = t & nonzero; t >>= 64; - t += (uint128_t)(r->d[3] ^ mask) + (SECP256K1_N_3 & mask); - r->d[3] = t & nonzero; + secp256k1_uint128 t; + secp256k1_u128_from_u64(&t, r->d[0] ^ mask); + secp256k1_u128_accum_u64(&t, (SECP256K1_N_0 + 1) & mask); + r->d[0] = secp256k1_u128_to_u64(&t) & nonzero; secp256k1_u128_rshift(&t, 64); + secp256k1_u128_accum_u64(&t, r->d[1] ^ mask); + secp256k1_u128_accum_u64(&t, SECP256K1_N_1 & mask); + r->d[1] = secp256k1_u128_to_u64(&t) & nonzero; secp256k1_u128_rshift(&t, 64); + secp256k1_u128_accum_u64(&t, r->d[2] ^ mask); + secp256k1_u128_accum_u64(&t, SECP256K1_N_2 & mask); + r->d[2] = secp256k1_u128_to_u64(&t) & nonzero; secp256k1_u128_rshift(&t, 64); + secp256k1_u128_accum_u64(&t, r->d[3] ^ mask); + secp256k1_u128_accum_u64(&t, SECP256K1_N_3 & mask); + r->d[3] = secp256k1_u128_to_u64(&t) & nonzero; return 2 * (mask == 0) - 1; } @@ -201,9 +221,10 @@ static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) { #define muladd(a,b) { \ uint64_t tl, th; \ { \ - uint128_t t = (uint128_t)a * b; \ - th = t >> 64; /* at most 0xFFFFFFFFFFFFFFFE */ \ - tl = t; \ + secp256k1_uint128 t; \ + secp256k1_u128_mul(&t, a, b); \ + th = secp256k1_u128_hi_u64(&t); /* at most 0xFFFFFFFFFFFFFFFE */ \ + tl = secp256k1_u128_to_u64(&t); \ } \ c0 += tl; /* overflow is handled on the next line */ \ th += (c0 < tl); /* at most 0xFFFFFFFFFFFFFFFF */ \ @@ -216,9 +237,10 @@ static int secp256k1_scalar_cond_negate(secp256k1_scalar *r, int flag) { #define muladd_fast(a,b) { \ uint64_t tl, th; \ { \ - uint128_t t = (uint128_t)a * b; \ - th = t >> 64; /* at most 0xFFFFFFFFFFFFFFFE */ \ - tl = t; \ + secp256k1_uint128 t; \ + secp256k1_u128_mul(&t, a, b); \ + th = secp256k1_u128_hi_u64(&t); /* at most 0xFFFFFFFFFFFFFFFE */ \ + tl = secp256k1_u128_to_u64(&t); \ } \ c0 += tl; /* overflow is handled on the next line */ \ th += (c0 < tl); /* at most 0xFFFFFFFFFFFFFFFF */ \ @@ -518,8 +540,8 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l) : "g"(p0), "g"(p1), "g"(p2), "g"(p3), "g"(p4), "D"(r), "i"(SECP256K1_N_C_0), "i"(SECP256K1_N_C_1) : "rax", "rdx", "r8", "r9", "r10", "cc", "memory"); #else - uint128_t c; - uint64_t c0, c1, c2; + secp256k1_uint128 c128; + uint64_t c, c0, c1, c2; uint64_t n0 = l[4], n1 = l[5], n2 = l[6], n3 = l[7]; uint64_t m0, m1, m2, m3, m4, m5; uint32_t m6; @@ -576,14 +598,18 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l) /* Reduce 258 bits into 256. */ /* r[0..3] = p[0..3] + p[4] * SECP256K1_N_C. */ - c = p0 + (uint128_t)SECP256K1_N_C_0 * p4; - r->d[0] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64; - c += p1 + (uint128_t)SECP256K1_N_C_1 * p4; - r->d[1] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64; - c += p2 + (uint128_t)p4; - r->d[2] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64; - c += p3; - r->d[3] = c & 0xFFFFFFFFFFFFFFFFULL; c >>= 64; + secp256k1_u128_from_u64(&c128, p0); + secp256k1_u128_accum_mul(&c128, SECP256K1_N_C_0, p4); + r->d[0] = secp256k1_u128_to_u64(&c128); secp256k1_u128_rshift(&c128, 64); + secp256k1_u128_accum_u64(&c128, p1); + secp256k1_u128_accum_mul(&c128, SECP256K1_N_C_1, p4); + r->d[1] = secp256k1_u128_to_u64(&c128); secp256k1_u128_rshift(&c128, 64); + secp256k1_u128_accum_u64(&c128, p2); + secp256k1_u128_accum_u64(&c128, p4); + r->d[2] = secp256k1_u128_to_u64(&c128); secp256k1_u128_rshift(&c128, 64); + secp256k1_u128_accum_u64(&c128, p3); + r->d[3] = secp256k1_u128_to_u64(&c128); + c = secp256k1_u128_hi_u64(&c128); #endif /* Final reduction of r. */ diff --git a/src/secp256k1.c b/src/secp256k1.c index bbe9567b3..5ba236b1c 100644 --- a/src/secp256k1.c +++ b/src/secp256k1.c @@ -22,6 +22,7 @@ #include "assumptions.h" #include "util.h" + #include "field_impl.h" #include "scalar_impl.h" #include "group_impl.h" @@ -32,6 +33,7 @@ #include "ecdsa_impl.h" #include "eckey_impl.h" #include "hash_impl.h" +#include "int128_impl.h" #include "scratch_impl.h" #include "selftest.h" diff --git a/src/tests.c b/src/tests.c index 9b7040f50..e1438832b 100644 --- a/src/tests.c +++ b/src/tests.c @@ -26,6 +26,7 @@ #include "modinv32_impl.h" #ifdef SECP256K1_WIDEMUL_INT128 #include "modinv64_impl.h" +#include "int128_impl.h" #endif #define CONDITIONAL_TEST(cnt, nam) if (count < (cnt)) { printf("Skipping %s (iteration count too low)\n", nam); } else @@ -476,6 +477,7 @@ void run_scratch_tests(void) { secp256k1_context_destroy(none); } + void run_ctz_tests(void) { static const uint32_t b32[] = {1, 0xffffffff, 0x5e56968f, 0xe0d63129}; static const uint64_t b64[] = {1, 0xffffffffffffffff, 0xbcd02462139b3fc3, 0x98b5f80c769693ef}; @@ -860,7 +862,8 @@ uint64_t modinv2p64(uint64_t x) { return w; } -/* compute out = (a*b) mod m; if b=NULL, treat b=1. + +/* compute out = (a*b) mod m; if b=NULL, treat b=1; if m=NULL, treat m=infinity. * * Out is a 512-bit number (represented as 32 uint16_t's in LE order). The other * arguments are 256-bit numbers (represented as 16 uint16_t's in LE order). */ @@ -902,46 +905,48 @@ void mulmod256(uint16_t* out, const uint16_t* a, const uint16_t* b, const uint16 } } - /* Compute the highest set bit in m. */ - for (i = 255; i >= 0; --i) { - if ((m[i >> 4] >> (i & 15)) & 1) { - m_bitlen = i; - break; + if (m) { + /* Compute the highest set bit in m. */ + for (i = 255; i >= 0; --i) { + if ((m[i >> 4] >> (i & 15)) & 1) { + m_bitlen = i; + break; + } } - } - /* Try do mul -= m<= 0; --i) { - uint16_t mul2[32]; - int64_t cs; - - /* Compute mul2 = mul - m<= 0 && bitpos < 256) { - sub |= ((m[bitpos >> 4] >> (bitpos & 15)) & 1) << p; + /* Try do mul -= m<= 0; --i) { + uint16_t mul2[32]; + int64_t cs; + + /* Compute mul2 = mul - m<= 0 && bitpos < 256) { + sub |= ((m[bitpos >> 4] >> (bitpos & 15)) & 1) << p; + } } + /* Add mul[j]-sub to accumulator, and shift bottom 16 bits out to mul2[j]. */ + cs += mul[j]; + cs -= sub; + mul2[j] = (cs & 0xFFFF); + cs >>= 16; + } + /* If remainder of subtraction is 0, set mul = mul2. */ + if (cs == 0) { + memcpy(mul, mul2, sizeof(mul)); } - /* Add mul[j]-sub to accumulator, and shift bottom 16 bits out to mul2[j]. */ - cs += mul[j]; - cs -= sub; - mul2[j] = (cs & 0xFFFF); - cs >>= 16; } - /* If remainder of subtraction is 0, set mul = mul2. */ - if (cs == 0) { - memcpy(mul, mul2, sizeof(mul)); + /* Sanity check: test that all limbs higher than m's highest are zero */ + for (i = (m_bitlen >> 4) + 1; i < 32; ++i) { + CHECK(mul[i] == 0); } } - /* Sanity check: test that all limbs higher than m's highest are zero */ - for (i = (m_bitlen >> 4) + 1; i < 32; ++i) { - CHECK(mul[i] == 0); - } memcpy(out, mul, 32); } @@ -1756,8 +1761,305 @@ void run_modinv_tests(void) { } } -/***** SCALAR TESTS *****/ +/***** INT128 TESTS *****/ + +#ifdef SECP256K1_WIDEMUL_INT128 +/* Add two 256-bit numbers (represented as 16 uint16_t's in LE order) together mod 2^256. */ +void add256(uint16_t* out, const uint16_t* a, const uint16_t* b) { + int i; + uint32_t carry = 0; + for (i = 0; i < 16; ++i) { + carry += a[i]; + carry += b[i]; + out[i] = carry; + carry >>= 16; + } +} + +/* Negate a 256-bit number (represented as 16 uint16_t's in LE order) mod 2^256. */ +void neg256(uint16_t* out, const uint16_t* a) { + int i; + uint32_t carry = 1; + for (i = 0; i < 16; ++i) { + carry += (uint16_t)~a[i]; + out[i] = carry; + carry >>= 16; + } +} + +/* Right-shift a 256-bit number (represented as 16 uint16_t's in LE order). */ +void rshift256(uint16_t* out, const uint16_t* a, int n, int sign_extend) { + uint16_t sign = sign_extend && (a[15] >> 15); + int i, j; + for (i = 15; i >= 0; --i) { + uint16_t v = 0; + for (j = 0; j < 16; ++j) { + int frompos = i*16 + j + n; + if (frompos >= 256) { + v |= sign << j; + } else { + v |= ((uint16_t)((a[frompos >> 4] >> (frompos & 15)) & 1)) << j; + } + } + out[i] = v; + } +} + +/* Load a 64-bit unsigned integer into an array of 16 uint16_t's in LE order representing a 256-bit value. */ +void load256u64(uint16_t* out, uint64_t v, int is_signed) { + int i; + uint64_t sign = is_signed && (v >> 63) ? UINT64_MAX : 0; + for (i = 0; i < 4; ++i) { + out[i] = v >> (16 * i); + } + for (i = 4; i < 16; ++i) { + out[i] = sign; + } +} +/* Load a 128-bit unsigned integer into an array of 16 uint16_t's in LE order representing a 256-bit value. */ +void load256two64(uint16_t* out, uint64_t hi, uint64_t lo, int is_signed) { + int i; + uint64_t sign = is_signed && (hi >> 63) ? UINT64_MAX : 0; + for (i = 0; i < 4; ++i) { + out[i] = lo >> (16 * i); + } + for (i = 4; i < 8; ++i) { + out[i] = hi >> (16 * (i - 4)); + } + for (i = 8; i < 16; ++i) { + out[i] = sign; + } +} + +/* Check whether the 256-bit value represented by array of 16-bit values is in range -2^127 < v < 2^127. */ +int int256is127(const uint16_t* v) { + int all_0 = ((v[7] & 0x8000) == 0), all_1 = ((v[7] & 0x8000) == 0x8000); + int i; + for (i = 8; i < 16; ++i) { + if (v[i] != 0) all_0 = 0; + if (v[i] != 0xffff) all_1 = 0; + } + return all_0 || all_1; +} + +void load256u128(uint16_t* out, const secp256k1_uint128* v) { + uint64_t lo = secp256k1_u128_to_u64(v), hi = secp256k1_u128_hi_u64(v); + load256two64(out, hi, lo, 0); +} + +void load256i128(uint16_t* out, const secp256k1_int128* v) { + uint64_t lo; + int64_t hi; + secp256k1_int128 c = *v; + lo = secp256k1_i128_to_i64(&c); + secp256k1_i128_rshift(&c, 64); + hi = secp256k1_i128_to_i64(&c); + load256two64(out, hi, lo, 1); +} + +void run_int128_test_case(void) { + unsigned char buf[32]; + uint64_t v[4]; + secp256k1_int128 swa, swz; + secp256k1_uint128 uwa, uwz; + uint64_t ub, uc; + int64_t sb, sc; + uint16_t rswa[16], rswz[32], rswr[32], ruwa[16], ruwz[32], ruwr[32]; + uint16_t rub[16], ruc[16], rsb[16], rsc[16]; + int i; + + /* Generate 32-byte random value. */ + secp256k1_testrand256_test(buf); + /* Convert into 4 64-bit integers. */ + for (i = 0; i < 4; ++i) { + uint64_t vi = 0; + int j; + for (j = 0; j < 8; ++j) vi = (vi << 8) + buf[8*i + j]; + v[i] = vi; + } + /* Convert those into a 128-bit value and two 64-bit values (signed and unsigned). */ + secp256k1_u128_load(&uwa, v[1], v[0]); + secp256k1_i128_load(&swa, v[1], v[0]); + ub = v[2]; + sb = v[2]; + uc = v[3]; + sc = v[3]; + /* Load those also into 16-bit array representations. */ + load256u128(ruwa, &uwa); + load256i128(rswa, &swa); + load256u64(rub, ub, 0); + load256u64(rsb, sb, 1); + load256u64(ruc, uc, 0); + load256u64(rsc, sc, 1); + /* test secp256k1_u128_mul */ + mulmod256(ruwr, rub, ruc, NULL); + secp256k1_u128_mul(&uwz, ub, uc); + load256u128(ruwz, &uwz); + CHECK(secp256k1_memcmp_var(ruwr, ruwz, 16) == 0); + /* test secp256k1_u128_accum_mul */ + mulmod256(ruwr, rub, ruc, NULL); + add256(ruwr, ruwr, ruwa); + uwz = uwa; + secp256k1_u128_accum_mul(&uwz, ub, uc); + load256u128(ruwz, &uwz); + CHECK(secp256k1_memcmp_var(ruwr, ruwz, 16) == 0); + /* test secp256k1_u128_accum_u64 */ + add256(ruwr, rub, ruwa); + uwz = uwa; + secp256k1_u128_accum_u64(&uwz, ub); + load256u128(ruwz, &uwz); + CHECK(secp256k1_memcmp_var(ruwr, ruwz, 16) == 0); + /* test secp256k1_u128_rshift */ + rshift256(ruwr, ruwa, uc % 128, 0); + uwz = uwa; + secp256k1_u128_rshift(&uwz, uc % 128); + load256u128(ruwz, &uwz); + CHECK(secp256k1_memcmp_var(ruwr, ruwz, 16) == 0); + /* test secp256k1_u128_to_u64 */ + CHECK(secp256k1_u128_to_u64(&uwa) == v[0]); + /* test secp256k1_u128_hi_u64 */ + CHECK(secp256k1_u128_hi_u64(&uwa) == v[1]); + /* test secp256k1_u128_from_u64 */ + secp256k1_u128_from_u64(&uwz, ub); + load256u128(ruwz, &uwz); + CHECK(secp256k1_memcmp_var(rub, ruwz, 16) == 0); + /* test secp256k1_u128_check_bits */ + { + int uwa_bits = 0; + int j; + for (j = 0; j < 128; ++j) { + if (ruwa[j / 16] >> (j % 16)) uwa_bits = 1 + j; + } + for (j = 0; j < 128; ++j) { + CHECK(secp256k1_u128_check_bits(&uwa, j) == (uwa_bits <= j)); + } + } + /* test secp256k1_i128_mul */ + mulmod256(rswr, rsb, rsc, NULL); + secp256k1_i128_mul(&swz, sb, sc); + load256i128(rswz, &swz); + CHECK(secp256k1_memcmp_var(rswr, rswz, 16) == 0); + /* test secp256k1_i128_accum_mul */ + mulmod256(rswr, rsb, rsc, NULL); + add256(rswr, rswr, rswa); + if (int256is127(rswr)) { + swz = swa; + secp256k1_i128_accum_mul(&swz, sb, sc); + load256i128(rswz, &swz); + CHECK(secp256k1_memcmp_var(rswr, rswz, 16) == 0); + } + /* test secp256k1_i128_det */ + { + uint16_t rsd[16], rse[16], rst[32]; + int64_t sd = v[0], se = v[1]; + load256u64(rsd, sd, 1); + load256u64(rse, se, 1); + mulmod256(rst, rsc, rsd, NULL); + neg256(rst, rst); + mulmod256(rswr, rsb, rse, NULL); + add256(rswr, rswr, rst); + secp256k1_i128_det(&swz, sb, sc, sd, se); + load256i128(rswz, &swz); + CHECK(secp256k1_memcmp_var(rswr, rswz, 16) == 0); + } + /* test secp256k1_i128_rshift */ + rshift256(rswr, rswa, uc % 127, 1); + swz = swa; + secp256k1_i128_rshift(&swz, uc % 127); + load256i128(rswz, &swz); + CHECK(secp256k1_memcmp_var(rswr, rswz, 16) == 0); + /* test secp256k1_i128_to_i64 */ + CHECK((uint64_t)secp256k1_i128_to_i64(&swa) == v[0]); + /* test secp256k1_i128_from_i64 */ + secp256k1_i128_from_i64(&swz, sb); + load256i128(rswz, &swz); + CHECK(secp256k1_memcmp_var(rsb, rswz, 16) == 0); + /* test secp256k1_i128_eq_var */ + { + int expect = (uc & 1); + swz = swa; + if (!expect) { + /* Make sure swz != swa */ + uint64_t v0c = v[0], v1c = v[1]; + if (ub & 64) { + v1c ^= (((uint64_t)1) << (ub & 63)); + } else { + v0c ^= (((uint64_t)1) << (ub & 63)); + } + secp256k1_i128_load(&swz, v1c, v0c); + } + CHECK(secp256k1_i128_eq_var(&swa, &swz) == expect); + } + /* test secp256k1_i128_check_pow2 */ + { + int expect = (uc & 1); + int pos = ub % 127; + if (expect) { + /* If expect==1, set swz to exactly (2 << pos). */ + uint64_t hi = 0; + uint64_t lo = 0; + if (pos & 64) { + hi = (((uint64_t)1) << (pos & 63)); + } else { + lo = (((uint64_t)1) << (pos & 63)); + } + secp256k1_i128_load(&swz, hi, lo); + } else { + /* If expect==0, set swz = swa, but update expect=1 if swa happens to equal (2 << pos). */ + if (pos & 64) { + if ((v[1] == (((uint64_t)1) << (pos & 63))) && v[0] == 0) expect = 1; + } else { + if ((v[0] == (((uint64_t)1) << (pos & 63))) && v[1] == 0) expect = 1; + } + swz = swa; + } + CHECK(secp256k1_i128_check_pow2(&swz, pos) == expect); + } +} + +void run_int128_tests(void) { + { /* secp256k1_u128_accum_mul */ + secp256k1_uint128 res; + + /* Check secp256k1_u128_accum_mul overflow */ + secp256k1_u128_mul(&res, UINT64_MAX, UINT64_MAX); + secp256k1_u128_accum_mul(&res, UINT64_MAX, UINT64_MAX); + CHECK(secp256k1_u128_to_u64(&res) == 2); + CHECK(secp256k1_u128_hi_u64(&res) == 18446744073709551612U); + } + { /* secp256k1_u128_accum_mul */ + secp256k1_int128 res; + + /* Compute INT128_MAX = 2^127 - 1 with secp256k1_i128_accum_mul */ + secp256k1_i128_mul(&res, INT64_MAX, INT64_MAX); + secp256k1_i128_accum_mul(&res, INT64_MAX, INT64_MAX); + CHECK(secp256k1_i128_to_i64(&res) == 2); + secp256k1_i128_accum_mul(&res, 4, 9223372036854775807); + secp256k1_i128_accum_mul(&res, 1, 1); + CHECK((uint64_t)secp256k1_i128_to_i64(&res) == UINT64_MAX); + secp256k1_i128_rshift(&res, 64); + CHECK(secp256k1_i128_to_i64(&res) == INT64_MAX); + + /* Compute INT128_MIN = - 2^127 with secp256k1_i128_accum_mul */ + secp256k1_i128_mul(&res, INT64_MAX, INT64_MIN); + CHECK(secp256k1_i128_to_i64(&res) == INT64_MIN); + secp256k1_i128_accum_mul(&res, INT64_MAX, INT64_MIN); + CHECK(secp256k1_i128_to_i64(&res) == 0); + secp256k1_i128_accum_mul(&res, 2, INT64_MIN); + CHECK(secp256k1_i128_to_i64(&res) == 0); + secp256k1_i128_rshift(&res, 64); + CHECK(secp256k1_i128_to_i64(&res) == INT64_MIN); + } + { + /* Randomized tests. */ + int i; + for (i = 0; i < 256 * count; ++i) run_int128_test_case(); + } +} +#endif + +/***** SCALAR TESTS *****/ void scalar_test(void) { secp256k1_scalar s; @@ -7409,6 +7711,9 @@ int main(int argc, char **argv) { run_rand_int(); run_util_tests(); +#ifdef SECP256K1_WIDEMUL_INT128 + run_int128_tests(); +#endif run_ctz_tests(); run_modinv_tests(); run_inverse_tests(); diff --git a/src/util.h b/src/util.h index ff1592929..dadb3cabe 100644 --- a/src/util.h +++ b/src/util.h @@ -281,28 +281,36 @@ static SECP256K1_INLINE void secp256k1_int_cmov(int *r, const int *a, int flag) *r = (int)(r_masked | a_masked); } -/* If USE_FORCE_WIDEMUL_{INT128,INT64} is set, use that wide multiplication implementation. - * Otherwise use the presence of __SIZEOF_INT128__ to decide. - */ -#if defined(USE_FORCE_WIDEMUL_INT128) +#if defined(USE_FORCE_WIDEMUL_INT128_STRUCT) +/* If USE_FORCE_WIDEMUL_INT128_STRUCT is set, use int128_struct. */ +# define SECP256K1_WIDEMUL_INT128 1 +# define SECP256K1_INT128_STRUCT 1 +#elif defined(USE_FORCE_WIDEMUL_INT128) +/* If USE_FORCE_WIDEMUL_INT128 is set, use int128. */ # define SECP256K1_WIDEMUL_INT128 1 +# define SECP256K1_INT128_NATIVE 1 #elif defined(USE_FORCE_WIDEMUL_INT64) +/* If USE_FORCE_WIDEMUL_INT64 is set, use int64. */ # define SECP256K1_WIDEMUL_INT64 1 #elif defined(UINT128_MAX) || defined(__SIZEOF_INT128__) +/* If a native 128-bit integer type exists, use int128. */ +# define SECP256K1_WIDEMUL_INT128 1 +# define SECP256K1_INT128_NATIVE 1 +#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_ARM64)) +/* On 64-bit MSVC targets (x86_64 and arm64), use int128_struct + * (which has special logic to implement using intrinsics on those systems). */ # define SECP256K1_WIDEMUL_INT128 1 +# define SECP256K1_INT128_STRUCT 1 +#elif SIZE_MAX > 0xffffffff +/* Systems with 64-bit pointers (and thus registers) very likely benefit from + * using 64-bit based arithmetic (even if we need to fall back to 32x32->64 based + * multiplication logic). */ +# define SECP256K1_WIDEMUL_INT128 1 +# define SECP256K1_INT128_STRUCT 1 #else +/* Lastly, fall back to int64 based arithmetic. */ # define SECP256K1_WIDEMUL_INT64 1 #endif -#if defined(SECP256K1_WIDEMUL_INT128) -# if !defined(UINT128_MAX) && defined(__SIZEOF_INT128__) -SECP256K1_GNUC_EXT typedef unsigned __int128 uint128_t; -SECP256K1_GNUC_EXT typedef __int128 int128_t; -#define UINT128_MAX ((uint128_t)(-1)) -#define INT128_MAX ((int128_t)(UINT128_MAX >> 1)) -#define INT128_MIN (-INT128_MAX - 1) -/* No (U)INT128_C macros because compilers providing __int128 do not support 128-bit literals. */ -# endif -#endif #ifndef __has_builtin #define __has_builtin(x) 0