From e97ed4e82f1c9cedc11a50e1d16119c793391442 Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Thu, 3 Aug 2023 15:58:46 +0000 Subject: [PATCH 01/15] Adopt s2n-bignum's verified scalar bignum functions to aws-lc's montgomery multiplication --- crypto/fipsmodule/CMakeLists.txt | 14 ++ crypto/fipsmodule/bn/montgomery.c | 127 +++++++++++++++++- .../s2n-bignum/include/s2n-bignum_aws-lc.h | 50 +++++++ 3 files changed, 186 insertions(+), 5 deletions(-) diff --git a/crypto/fipsmodule/CMakeLists.txt b/crypto/fipsmodule/CMakeLists.txt index 84b3a3cc37..7fba0092a8 100644 --- a/crypto/fipsmodule/CMakeLists.txt +++ b/crypto/fipsmodule/CMakeLists.txt @@ -229,6 +229,20 @@ if((ARCH STREQUAL "x86_64" OR ARCH STREQUAL "aarch64") AND curve25519/curve25519_x25519base_byte.S curve25519/curve25519_x25519base_byte_alt.S ) + + # Big integer arithmetics using s2n-bignum + list(APPEND S2N_BIGNUM_ASM_SOURCES + fastmul/bignum_kmul_16_32.S + fastmul/bignum_kmul_32_64.S + fastmul/bignum_ksqr_16_32.S + fastmul/bignum_ksqr_32_64.S + fastmul/bignum_emontredc_8n.S + + generic/bignum_ge.S + generic/bignum_mul.S + generic/bignum_optsub.S + generic/bignum_sqr.S + ) endif() endif() diff --git a/crypto/fipsmodule/bn/montgomery.c b/crypto/fipsmodule/bn/montgomery.c index 4c54360ef9..61ce11995b 100644 --- a/crypto/fipsmodule/bn/montgomery.c +++ b/crypto/fipsmodule/bn/montgomery.c @@ -119,8 +119,47 @@ #include #include "internal.h" +#include "../cpucap/internal.h" #include "../../internal.h" +#if !defined(OPENSSL_NO_ASM) && \ + (defined(OPENSSL_LINUX) || defined(OPENSSL_APPLE)) && \ + defined(OPENSSL_AARCH64) && !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX) && \ + defined(OPENSSL_BN_ASM_MONT) + +#include "../../../third_party/s2n-bignum/include/s2n-bignum_aws-lc.h" + +#define BN_MONTGOMERY_USE_S2N_BIGNUM 1 + +#endif + +OPENSSL_INLINE int montgomery_s2n_bignum_capable(void) { +#if defined(BN_MONTGOMERY_USE_S2N_BIGNUM) + + return 1; + +#else + + return 0; + +#endif +} + +OPENSSL_INLINE int montgomery_use_s2n_bignum(unsigned int num) { +#if defined(BN_MONTGOMERY_USE_S2N_BIGNUM) + + // Use s2n-bignum's functions only if (1) the ARM architecture has slow + // multipliers, and (2) temporary buffer's size does not exceed + // BN_MONTGOMERY_MAX_WORDS. + return !CRYPTO_is_ARMv8_wide_multiplier_capable() && (num % 8 == 0) && + BN_BITS2 == 64 && (2 * (uint64_t)num + 96) <= BN_MONTGOMERY_MAX_WORDS; + +#else + + return 0; + +#endif +} BN_MONT_CTX *BN_MONT_CTX_new(void) { BN_MONT_CTX *ret = OPENSSL_malloc(sizeof(BN_MONT_CTX)); @@ -418,6 +457,77 @@ static int bn_mod_mul_montgomery_fallback(BIGNUM *r, const BIGNUM *a, return ret; } + +#if defined(OPENSSL_BN_ASM_MONT) + +static void montgomery_s2n_bignum_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, + const BN_ULONG *bp, + const BN_ULONG *np, + const BN_ULONG *n0, size_t num) { + +#if defined(BN_MONTGOMERY_USE_S2N_BIGNUM) + + // t is a temporary buffer used by big-int multiplication. + // bignum_kmul_32_64 requires 96 words at maximum. + uint64_t t[96]; + // l is the output buffer of big-int multiplication. + // Its low num*2 elements are used. + // It is montgomery_use_s2n_bignum() that checks whether num*2 fits in the + // size of mulres array. + uint64_t mulres[BN_MONTGOMERY_MAX_WORDS - 96]; + + // BN_ULONG is uint64_t since BN_BITS2 is 64. + // m is the prime number, and m * w = -1 mod 2^64. + uint64_t *m = (uint64_t *)np; + uint64_t w = (uint64_t)n0[0]; + uint64_t *src = (uint64_t *)ap, *src2 = (uint64_t *)bp; + uint64_t *dest = (uint64_t *)rp; + uint64_t c; + + if (num == 32) { + if (ap == bp) + bignum_ksqr_32_64(mulres, src, t); + else + bignum_kmul_32_64(mulres, src2, src, t); + } else if (num == 16) { + if (ap == bp) + bignum_ksqr_16_32(mulres, src, t); + else + bignum_kmul_16_32(mulres, src2, src, t); + } else { + if (ap == bp) + bignum_sqr(num * 2, mulres, num, src); + else + bignum_mul(num * 2, mulres, num, src2, num, src); + } + + // Do montgomery reduction. We follow the definition of montgomery reduction + // which is: + // 1. Calculate (mulres + ((mulres mod R) * (-m^-1 mod R) mod R) * m) / R + // using + // bignum_emontredc_8n, where R is 2^(64*num). + // The calculated result is stored in the upper half elements of the mulres + // buffer. If the result overflows num words, bignum_emontredc_8n + // returns 1. + // 2. Optionally subtract the result if the (result of step 1) >= m. + // The comparison is true if (1) there is an overflow (bignum_emontredc_8n + // returns 1), or (2) the upper half mulres is larger than m. + c = bignum_emontredc_8n(num, mulres, m, w); + c |= bignum_ge(num, mulres + num, num, m); + // Do the step 2 and store the result at dest (which is rp) + bignum_optsub(num, dest, mulres + num, c, m); + +#else + + // Should not call this function unless s2n-bignum is supported. + abort(); + +#endif +} + +#endif + + int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BN_MONT_CTX *mont, BN_CTX *ctx) { if (a->neg || b->neg) { @@ -437,11 +547,18 @@ int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, // This bound is implied by |bn_mont_ctx_set_N_and_n0|. |bn_mul_mont| // allocates |num| words on the stack, so |num| cannot be too large. assert((size_t)num <= BN_MONTGOMERY_MAX_WORDS); - if (!bn_mul_mont(r->d, a->d, b->d, mont->N.d, mont->n0, num)) { - // The check above ensures this won't happen. - assert(0); - OPENSSL_PUT_ERROR(BN, ERR_R_INTERNAL_ERROR); - return 0; + + if (montgomery_s2n_bignum_capable() && montgomery_use_s2n_bignum(num)) { + // Do montgomery multiplication using s2n-bignum. + montgomery_s2n_bignum_mul_mont(r->d, a->d, b->d, mont->N.d, mont->n0, + num); + } else { + if (!bn_mul_mont(r->d, a->d, b->d, mont->N.d, mont->n0, num)) { + // The check above ensures this won't happen. + assert(0); + OPENSSL_PUT_ERROR(BN, ERR_R_INTERNAL_ERROR); + return 0; + } } r->neg = 0; r->width = num; diff --git a/third_party/s2n-bignum/include/s2n-bignum_aws-lc.h b/third_party/s2n-bignum/include/s2n-bignum_aws-lc.h index cd1687b902..c06bd29798 100644 --- a/third_party/s2n-bignum/include/s2n-bignum_aws-lc.h +++ b/third_party/s2n-bignum/include/s2n-bignum_aws-lc.h @@ -130,3 +130,53 @@ extern void curve25519_x25519_byte_alt(uint8_t res[static 32], const uint8_t sca // another u-coordinate, is saved in |res|. extern void curve25519_x25519base_byte(uint8_t res[static 32], const uint8_t scalar[static 32]); extern void curve25519_x25519base_byte_alt(uint8_t res[static 32], const uint8_t scalar[static 32]); + +// Evaluate z := x^2 where x is a 2048-bit integer. +// Input: x[32]; output: z[64]; temporary buffer: t[>=72] +extern void bignum_ksqr_32_64(uint64_t z[static 64], uint64_t x[static 32], + uint64_t t[static 72]); + +// Evaluate z := x^2 where x is a 1024-bit integer. +// Input: x[16]; output: z[32]; temporary buffer: t[>=24] +extern void bignum_ksqr_16_32(uint64_t z[static 32], uint64_t x[static 16], + uint64_t t[static 24]); + +// Evaluate z := x * y where x and y are 2048-bit integers. +// Inputs: x[32], y[32]; output: z[64]; temporary buffer t[>=96] +extern void bignum_kmul_32_64(uint64_t z[static 64], uint64_t x[static 32], + uint64_t y[static 32], uint64_t t[static 96]); + +// Evaluate z := x * y where x and y are 1024-bit integers. +// Inputs: x[16], y[16]; output: z[32]; temporary buffer t[>=32] +extern void bignum_kmul_16_32(uint64_t z[static 32], uint64_t x[static 16], + uint64_t y[static 16], uint64_t t[static 32]); + +// Extended Montgomery reduce in 8-digit blocks. +// Assumes that z initially holds a 2k-digit bignum z_0, m is a k-digit odd +// bignum and m * w == -1 (mod 2^64). This function also uses z for the output +// as well as returning a carry c of 0 or 1. This encodes two numbers: in the +// lower half of the z buffer we have q = z[0..k-1], while the upper half +// together with the carry gives r = 2^{64k}*c + z[k..2k-1]. These values +// satisfy z_0 + q * m = 2^{64k} * r, i.e. r gives a raw (unreduced) Montgomery +// reduction while q gives the multiplier that was used. +// Inputs: z[2*k], m[k], w; outputs: function return (extra result bit) and z[2*k] +extern uint64_t bignum_emontredc_8n(uint64_t k, uint64_t *z, uint64_t *m, + uint64_t w); + +// Optionally subtract, z := x - y (if p nonzero) or z := x (if p zero) +// Inputs: x[k], p, y[k]; outputs: function return (carry-out) and z[k] +extern uint64_t bignum_optsub(uint64_t k, uint64_t *z, uint64_t *x, uint64_t p, + uint64_t *y); + +// Compare bignums, x >= y. +// Inputs: x[m], y[n]; output: function return (1 if x >= y) +extern uint64_t bignum_ge(uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); + +// General big-integer multiplication (z := x * y). +// Inputs: x[m], y[n]; output: z[k]. If k < m+n, the result is truncated. +extern void bignum_mul(uint64_t k, uint64_t *z, uint64_t m, uint64_t *x, + uint64_t n, uint64_t *y); + +// General big-integer squaring (z := x^2). +// Inputs: x[m]; output: z[k]. If k < 2m, the result is truncated. +extern void bignum_sqr(uint64_t k, uint64_t *z, uint64_t m, uint64_t *x); \ No newline at end of file From bccf5d5020d0b0f5768bcb290c755fc4034863be Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Thu, 3 Aug 2023 16:14:15 +0000 Subject: [PATCH 02/15] resolve newline error --- third_party/s2n-bignum/include/s2n-bignum_aws-lc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/s2n-bignum/include/s2n-bignum_aws-lc.h b/third_party/s2n-bignum/include/s2n-bignum_aws-lc.h index c06bd29798..decc80f993 100644 --- a/third_party/s2n-bignum/include/s2n-bignum_aws-lc.h +++ b/third_party/s2n-bignum/include/s2n-bignum_aws-lc.h @@ -179,4 +179,4 @@ extern void bignum_mul(uint64_t k, uint64_t *z, uint64_t m, uint64_t *x, // General big-integer squaring (z := x^2). // Inputs: x[m]; output: z[k]. If k < 2m, the result is truncated. -extern void bignum_sqr(uint64_t k, uint64_t *z, uint64_t m, uint64_t *x); \ No newline at end of file +extern void bignum_sqr(uint64_t k, uint64_t *z, uint64_t m, uint64_t *x); From 15c75de21ed6010eb12383e2db6a4b5c4616c3f2 Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Mon, 7 Aug 2023 17:50:12 +0000 Subject: [PATCH 03/15] Improve comments --- crypto/fipsmodule/bn/montgomery.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/crypto/fipsmodule/bn/montgomery.c b/crypto/fipsmodule/bn/montgomery.c index 61ce11995b..cb7c69a731 100644 --- a/crypto/fipsmodule/bn/montgomery.c +++ b/crypto/fipsmodule/bn/montgomery.c @@ -148,9 +148,13 @@ OPENSSL_INLINE int montgomery_s2n_bignum_capable(void) { OPENSSL_INLINE int montgomery_use_s2n_bignum(unsigned int num) { #if defined(BN_MONTGOMERY_USE_S2N_BIGNUM) - // Use s2n-bignum's functions only if (1) the ARM architecture has slow - // multipliers, and (2) temporary buffer's size does not exceed - // BN_MONTGOMERY_MAX_WORDS. + // Use s2n-bignum's functions only if + // (1) The ARM architecture has slow multipliers, and + // (2) num (which is the number of words) is multiplie of 8, because + // s2n-bignum's bignum_emontredc_8n requires it + // (3) The word size is 64 bits, and + // (4) Temporary buffer's size used in montgomery_s2n_bignum_mul_mont + // does not exceed BN_MONTGOMERY_MAX_WORDS. return !CRYPTO_is_ARMv8_wide_multiplier_capable() && (num % 8 == 0) && BN_BITS2 == 64 && (2 * (uint64_t)num + 96) <= BN_MONTGOMERY_MAX_WORDS; @@ -468,10 +472,11 @@ static void montgomery_s2n_bignum_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, #if defined(BN_MONTGOMERY_USE_S2N_BIGNUM) // t is a temporary buffer used by big-int multiplication. - // bignum_kmul_32_64 requires 96 words at maximum. + // bignum_kmul_32_64 requires 96 words. uint64_t t[96]; - // l is the output buffer of big-int multiplication. - // Its low num*2 elements are used. + // mulres is the output buffer of big-int multiplication. + // If BN_MONTGOMERY_MAX_WORDS - 96 is larger than num*2, its low num*2 + // elements are used. // It is montgomery_use_s2n_bignum() that checks whether num*2 fits in the // size of mulres array. uint64_t mulres[BN_MONTGOMERY_MAX_WORDS - 96]; From 4e1a7cf5fdff7bba92bb7f66b05fd15880e631f4 Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Tue, 8 Aug 2023 17:20:06 +0000 Subject: [PATCH 04/15] address comments --- crypto/fipsmodule/bn/montgomery.c | 57 ++++++++----------- .../s2n-bignum/include/s2n-bignum_aws-lc.h | 26 ++++----- 2 files changed, 38 insertions(+), 45 deletions(-) diff --git a/crypto/fipsmodule/bn/montgomery.c b/crypto/fipsmodule/bn/montgomery.c index cb7c69a731..b93c2d937c 100644 --- a/crypto/fipsmodule/bn/montgomery.c +++ b/crypto/fipsmodule/bn/montgomery.c @@ -133,18 +133,6 @@ #endif -OPENSSL_INLINE int montgomery_s2n_bignum_capable(void) { -#if defined(BN_MONTGOMERY_USE_S2N_BIGNUM) - - return 1; - -#else - - return 0; - -#endif -} - OPENSSL_INLINE int montgomery_use_s2n_bignum(unsigned int num) { #if defined(BN_MONTGOMERY_USE_S2N_BIGNUM) @@ -153,8 +141,9 @@ OPENSSL_INLINE int montgomery_use_s2n_bignum(unsigned int num) { // (2) num (which is the number of words) is multiplie of 8, because // s2n-bignum's bignum_emontredc_8n requires it // (3) The word size is 64 bits, and - // (4) Temporary buffer's size used in montgomery_s2n_bignum_mul_mont - // does not exceed BN_MONTGOMERY_MAX_WORDS. + // (4) Temporary buffer's size (t and mulres) used in + // montgomery_s2n_bignum_mul_mont does not exceed + // BN_MONTGOMERY_MAX_WORDS. return !CRYPTO_is_ARMv8_wide_multiplier_capable() && (num % 8 == 0) && BN_BITS2 == 64 && (2 * (uint64_t)num + 96) <= BN_MONTGOMERY_MAX_WORDS; @@ -464,6 +453,14 @@ static int bn_mod_mul_montgomery_fallback(BIGNUM *r, const BIGNUM *a, #if defined(OPENSSL_BN_ASM_MONT) +// Perform montgomery multiplication using s2n-bignum functions. The arguments +// are equivalent to the arguments of bn_mul_mont. +// montgomery_s2n_bignum_mul_mont works only if num is a multiple of 8. For +// num = 32 or num = 16, this uses faster primitives in s2n-bignum. +// Additionally, montgomery_s2n_bignum_mul_mont allocates arrays at a stack, and +// large num leads to out of bounds accesses of the arrays. +// montgomery_use_s2n_bignum(num) must be called in advance to check these +// conditions. static void montgomery_s2n_bignum_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, @@ -481,29 +478,24 @@ static void montgomery_s2n_bignum_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, // size of mulres array. uint64_t mulres[BN_MONTGOMERY_MAX_WORDS - 96]; - // BN_ULONG is uint64_t since BN_BITS2 is 64. - // m is the prime number, and m * w = -1 mod 2^64. - uint64_t *m = (uint64_t *)np; - uint64_t w = (uint64_t)n0[0]; - uint64_t *src = (uint64_t *)ap, *src2 = (uint64_t *)bp; - uint64_t *dest = (uint64_t *)rp; - uint64_t c; + // Given m the prime number stored at np, m * w = -1 mod 2^64. + uint64_t w = n0[0]; if (num == 32) { if (ap == bp) - bignum_ksqr_32_64(mulres, src, t); + bignum_ksqr_32_64(mulres, ap, t); else - bignum_kmul_32_64(mulres, src2, src, t); + bignum_kmul_32_64(mulres, ap, bp, t); } else if (num == 16) { if (ap == bp) - bignum_ksqr_16_32(mulres, src, t); + bignum_ksqr_16_32(mulres, ap, t); else - bignum_kmul_16_32(mulres, src2, src, t); + bignum_kmul_16_32(mulres, ap, bp, t); } else { if (ap == bp) - bignum_sqr(num * 2, mulres, num, src); + bignum_sqr(num * 2, mulres, num, ap); else - bignum_mul(num * 2, mulres, num, src2, num, src); + bignum_mul(num * 2, mulres, num, ap, num, bp); } // Do montgomery reduction. We follow the definition of montgomery reduction @@ -517,10 +509,11 @@ static void montgomery_s2n_bignum_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, // 2. Optionally subtract the result if the (result of step 1) >= m. // The comparison is true if (1) there is an overflow (bignum_emontredc_8n // returns 1), or (2) the upper half mulres is larger than m. - c = bignum_emontredc_8n(num, mulres, m, w); - c |= bignum_ge(num, mulres + num, num, m); - // Do the step 2 and store the result at dest (which is rp) - bignum_optsub(num, dest, mulres + num, c, m); + uint64_t c; + c = bignum_emontredc_8n(num, mulres, np, w); + c |= bignum_ge(num, mulres + num, num, np); + // Do the step 2 and store the result at rp + bignum_optsub(num, rp, mulres + num, c, np); #else @@ -553,7 +546,7 @@ int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, // allocates |num| words on the stack, so |num| cannot be too large. assert((size_t)num <= BN_MONTGOMERY_MAX_WORDS); - if (montgomery_s2n_bignum_capable() && montgomery_use_s2n_bignum(num)) { + if (montgomery_use_s2n_bignum(num)) { // Do montgomery multiplication using s2n-bignum. montgomery_s2n_bignum_mul_mont(r->d, a->d, b->d, mont->N.d, mont->n0, num); diff --git a/third_party/s2n-bignum/include/s2n-bignum_aws-lc.h b/third_party/s2n-bignum/include/s2n-bignum_aws-lc.h index decc80f993..11f35f86d0 100644 --- a/third_party/s2n-bignum/include/s2n-bignum_aws-lc.h +++ b/third_party/s2n-bignum/include/s2n-bignum_aws-lc.h @@ -133,23 +133,23 @@ extern void curve25519_x25519base_byte_alt(uint8_t res[static 32], const uint8_t // Evaluate z := x^2 where x is a 2048-bit integer. // Input: x[32]; output: z[64]; temporary buffer: t[>=72] -extern void bignum_ksqr_32_64(uint64_t z[static 64], uint64_t x[static 32], +extern void bignum_ksqr_32_64(uint64_t z[static 64], const uint64_t x[static 32], uint64_t t[static 72]); // Evaluate z := x^2 where x is a 1024-bit integer. // Input: x[16]; output: z[32]; temporary buffer: t[>=24] -extern void bignum_ksqr_16_32(uint64_t z[static 32], uint64_t x[static 16], +extern void bignum_ksqr_16_32(uint64_t z[static 32], const uint64_t x[static 16], uint64_t t[static 24]); // Evaluate z := x * y where x and y are 2048-bit integers. // Inputs: x[32], y[32]; output: z[64]; temporary buffer t[>=96] -extern void bignum_kmul_32_64(uint64_t z[static 64], uint64_t x[static 32], - uint64_t y[static 32], uint64_t t[static 96]); +extern void bignum_kmul_32_64(uint64_t z[static 64], const uint64_t x[static 32], + const uint64_t y[static 32], uint64_t t[static 96]); // Evaluate z := x * y where x and y are 1024-bit integers. // Inputs: x[16], y[16]; output: z[32]; temporary buffer t[>=32] -extern void bignum_kmul_16_32(uint64_t z[static 32], uint64_t x[static 16], - uint64_t y[static 16], uint64_t t[static 32]); +extern void bignum_kmul_16_32(uint64_t z[static 32], const uint64_t x[static 16], + const uint64_t y[static 16], uint64_t t[static 32]); // Extended Montgomery reduce in 8-digit blocks. // Assumes that z initially holds a 2k-digit bignum z_0, m is a k-digit odd @@ -160,23 +160,23 @@ extern void bignum_kmul_16_32(uint64_t z[static 32], uint64_t x[static 16], // satisfy z_0 + q * m = 2^{64k} * r, i.e. r gives a raw (unreduced) Montgomery // reduction while q gives the multiplier that was used. // Inputs: z[2*k], m[k], w; outputs: function return (extra result bit) and z[2*k] -extern uint64_t bignum_emontredc_8n(uint64_t k, uint64_t *z, uint64_t *m, +extern uint64_t bignum_emontredc_8n(uint64_t k, uint64_t *z, const uint64_t *m, uint64_t w); // Optionally subtract, z := x - y (if p nonzero) or z := x (if p zero) // Inputs: x[k], p, y[k]; outputs: function return (carry-out) and z[k] -extern uint64_t bignum_optsub(uint64_t k, uint64_t *z, uint64_t *x, uint64_t p, - uint64_t *y); +extern uint64_t bignum_optsub(uint64_t k, uint64_t *z, const uint64_t *x, uint64_t p, + const uint64_t *y); // Compare bignums, x >= y. // Inputs: x[m], y[n]; output: function return (1 if x >= y) -extern uint64_t bignum_ge(uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); +extern uint64_t bignum_ge(uint64_t m, const uint64_t *x, uint64_t n, const uint64_t *y); // General big-integer multiplication (z := x * y). // Inputs: x[m], y[n]; output: z[k]. If k < m+n, the result is truncated. -extern void bignum_mul(uint64_t k, uint64_t *z, uint64_t m, uint64_t *x, - uint64_t n, uint64_t *y); +extern void bignum_mul(uint64_t k, uint64_t *z, uint64_t m, const uint64_t *x, + uint64_t n, const uint64_t *y); // General big-integer squaring (z := x^2). // Inputs: x[m]; output: z[k]. If k < 2m, the result is truncated. -extern void bignum_sqr(uint64_t k, uint64_t *z, uint64_t m, uint64_t *x); +extern void bignum_sqr(uint64_t k, uint64_t *z, uint64_t m, const uint64_t *x); From 759f4c0134656786cabc99bb86fa3e8b9d247d34 Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Thu, 10 Aug 2023 03:46:46 +0000 Subject: [PATCH 05/15] improve comments --- crypto/fipsmodule/bn/montgomery.c | 20 +++++++++---------- .../s2n-bignum/include/s2n-bignum_aws-lc.h | 6 ++++++ 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/crypto/fipsmodule/bn/montgomery.c b/crypto/fipsmodule/bn/montgomery.c index b93c2d937c..b9c60dd4ad 100644 --- a/crypto/fipsmodule/bn/montgomery.c +++ b/crypto/fipsmodule/bn/montgomery.c @@ -501,18 +501,18 @@ static void montgomery_s2n_bignum_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, // Do montgomery reduction. We follow the definition of montgomery reduction // which is: // 1. Calculate (mulres + ((mulres mod R) * (-m^-1 mod R) mod R) * m) / R - // using - // bignum_emontredc_8n, where R is 2^(64*num). - // The calculated result is stored in the upper half elements of the mulres - // buffer. If the result overflows num words, bignum_emontredc_8n - // returns 1. + // using bignum_emontredc_8n, where R is 2^(64*num). + // The calculated result is stored in [mulres+num ... mulres+2*num-1]. If + // the result >= 2^(64*num), bignum_emontredc_8n returns 1. // 2. Optionally subtract the result if the (result of step 1) >= m. - // The comparison is true if (1) there is an overflow (bignum_emontredc_8n - // returns 1), or (2) the upper half mulres is larger than m. + // The comparison is true if either A or B holds: + // A. The result of step 1 >= 2^(64*num), meaning that bignum_emontredc_8n + // returned 1. Since m is less than 2^(64*num), (result of step 1) >= m holds. + // B. The result of step 1 fits in 2^(64*num), and the result >= m. uint64_t c; - c = bignum_emontredc_8n(num, mulres, np, w); - c |= bignum_ge(num, mulres + num, num, np); - // Do the step 2 and store the result at rp + c = bignum_emontredc_8n(num, mulres, np, w); // c: case A + c |= bignum_ge(num, mulres + num, num, np); // c: case B + // Optionally subtract and store the result at rp bignum_optsub(num, rp, mulres + num, c, np); #else diff --git a/third_party/s2n-bignum/include/s2n-bignum_aws-lc.h b/third_party/s2n-bignum/include/s2n-bignum_aws-lc.h index 11f35f86d0..d1db326d0b 100644 --- a/third_party/s2n-bignum/include/s2n-bignum_aws-lc.h +++ b/third_party/s2n-bignum/include/s2n-bignum_aws-lc.h @@ -159,6 +159,12 @@ extern void bignum_kmul_16_32(uint64_t z[static 32], const uint64_t x[static 16] // together with the carry gives r = 2^{64k}*c + z[k..2k-1]. These values // satisfy z_0 + q * m = 2^{64k} * r, i.e. r gives a raw (unreduced) Montgomery // reduction while q gives the multiplier that was used. +// Note that q = (z_0 mod 2^{64k}) * (-m^-1 mod 2^{64k}) mod 2^{64k}. +// z_0 + q * m = 0 mod 2^{64k} +// q * m = -z_0 mod 2^{64k} +// q = -z_0 * m^-1 mod 2^{64k} +// = (z_0 mod 2^{64k}) * (-m^-1 mod 2^{64k}) mod 2^{64k} +// q is uniquely determined because q must be in the range of [0, 2^{64k}-1]. // Inputs: z[2*k], m[k], w; outputs: function return (extra result bit) and z[2*k] extern uint64_t bignum_emontredc_8n(uint64_t k, uint64_t *z, const uint64_t *m, uint64_t w); From 816e698b437b9d5759ffa9926a5b5c65d2a58435 Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Thu, 10 Aug 2023 16:08:53 +0000 Subject: [PATCH 06/15] Address comments --- crypto/fipsmodule/bn/montgomery.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/crypto/fipsmodule/bn/montgomery.c b/crypto/fipsmodule/bn/montgomery.c index b9c60dd4ad..2ec5b396fc 100644 --- a/crypto/fipsmodule/bn/montgomery.c +++ b/crypto/fipsmodule/bn/montgomery.c @@ -124,18 +124,13 @@ #if !defined(OPENSSL_NO_ASM) && \ (defined(OPENSSL_LINUX) || defined(OPENSSL_APPLE)) && \ - defined(OPENSSL_AARCH64) && !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX) && \ - defined(OPENSSL_BN_ASM_MONT) + defined(OPENSSL_AARCH64) && defined(OPENSSL_BN_ASM_MONT) #include "../../../third_party/s2n-bignum/include/s2n-bignum_aws-lc.h" #define BN_MONTGOMERY_USE_S2N_BIGNUM 1 -#endif - OPENSSL_INLINE int montgomery_use_s2n_bignum(unsigned int num) { -#if defined(BN_MONTGOMERY_USE_S2N_BIGNUM) - // Use s2n-bignum's functions only if // (1) The ARM architecture has slow multipliers, and // (2) num (which is the number of words) is multiplie of 8, because @@ -146,13 +141,15 @@ OPENSSL_INLINE int montgomery_use_s2n_bignum(unsigned int num) { // BN_MONTGOMERY_MAX_WORDS. return !CRYPTO_is_ARMv8_wide_multiplier_capable() && (num % 8 == 0) && BN_BITS2 == 64 && (2 * (uint64_t)num + 96) <= BN_MONTGOMERY_MAX_WORDS; +} #else +OPENSSL_INLINE int montgomery_use_s2n_bignum(unsigned int num) { return 0; +} #endif -} BN_MONT_CTX *BN_MONT_CTX_new(void) { BN_MONT_CTX *ret = OPENSSL_malloc(sizeof(BN_MONT_CTX)); @@ -451,8 +448,6 @@ static int bn_mod_mul_montgomery_fallback(BIGNUM *r, const BIGNUM *a, } -#if defined(OPENSSL_BN_ASM_MONT) - // Perform montgomery multiplication using s2n-bignum functions. The arguments // are equivalent to the arguments of bn_mul_mont. // montgomery_s2n_bignum_mul_mont works only if num is a multiple of 8. For @@ -523,8 +518,6 @@ static void montgomery_s2n_bignum_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, #endif } -#endif - int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BN_MONT_CTX *mont, BN_CTX *ctx) { From 83b3af74f347432642d50392526e6a2bec6ef5d8 Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Thu, 10 Aug 2023 17:34:09 +0000 Subject: [PATCH 07/15] OPENSSL_BN_ASM_MONT --- crypto/fipsmodule/bn/montgomery.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crypto/fipsmodule/bn/montgomery.c b/crypto/fipsmodule/bn/montgomery.c index 2ec5b396fc..fd3c4027f2 100644 --- a/crypto/fipsmodule/bn/montgomery.c +++ b/crypto/fipsmodule/bn/montgomery.c @@ -448,6 +448,8 @@ static int bn_mod_mul_montgomery_fallback(BIGNUM *r, const BIGNUM *a, } +#if defined(OPENSSL_BN_ASM_MONT) + // Perform montgomery multiplication using s2n-bignum functions. The arguments // are equivalent to the arguments of bn_mul_mont. // montgomery_s2n_bignum_mul_mont works only if num is a multiple of 8. For @@ -518,6 +520,8 @@ static void montgomery_s2n_bignum_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, #endif } +#endif + int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BN_MONT_CTX *mont, BN_CTX *ctx) { From c3df1bc4cd2118ec355a0d8103616bbc7afabbdd Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Thu, 10 Aug 2023 21:03:16 +0000 Subject: [PATCH 08/15] Fix CMakeLists.txt to link s2n-bignum assembly files in ARM even if MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX is set --- crypto/fipsmodule/CMakeLists.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crypto/fipsmodule/CMakeLists.txt b/crypto/fipsmodule/CMakeLists.txt index 7fba0092a8..7bea12fb5e 100644 --- a/crypto/fipsmodule/CMakeLists.txt +++ b/crypto/fipsmodule/CMakeLists.txt @@ -167,8 +167,9 @@ endif() # s2n-bignum files can be compiled on Unix platforms only (except Apple), # and on x86_64 and aarch64 systems only. -if((ARCH STREQUAL "x86_64" OR ARCH STREQUAL "aarch64") AND - UNIX AND NOT MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX) +if((((ARCH STREQUAL "x86_64") AND NOT MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX) OR + ARCH STREQUAL "aarch64") AND + UNIX) # Set the source directory for s2n-bignum assembly files if(ARCH STREQUAL "x86_64") From 7c49f8d8833cb833b65825c6710842572ee29f62 Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Thu, 10 Aug 2023 21:52:44 +0000 Subject: [PATCH 09/15] Let macro checks be consistent with the CMakeList.txt updates in this PR --- crypto/curve25519/curve25519.c | 8 +++++--- crypto/fipsmodule/ec/p384.c | 8 ++++---- crypto/fipsmodule/ec/p521.c | 8 ++++---- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/crypto/curve25519/curve25519.c b/crypto/curve25519/curve25519.c index 169fed01b1..ad346c885c 100644 --- a/crypto/curve25519/curve25519.c +++ b/crypto/curve25519/curve25519.c @@ -31,9 +31,11 @@ #include "../internal.h" #include "../fipsmodule/cpucap/internal.h" -#if (defined(OPENSSL_X86_64) || defined(OPENSSL_AARCH64)) && \ - (defined(OPENSSL_LINUX) || defined(OPENSSL_APPLE)) && \ - !defined(OPENSSL_NO_ASM) && !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX) +#if ((defined(OPENSSL_X86_64) && \ + !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX)) || \ + defined(OPENSSL_AARCH64)) && \ + (defined(OPENSSL_LINUX) || defined(OPENSSL_APPLE)) && \ + !defined(OPENSSL_NO_ASM) #include "../../third_party/s2n-bignum/include/s2n-bignum_aws-lc.h" #define CURVE25519_S2N_BIGNUM_CAPABLE #endif diff --git a/crypto/fipsmodule/ec/p384.c b/crypto/fipsmodule/ec/p384.c index 6941f26264..7d4fccb8b9 100644 --- a/crypto/fipsmodule/ec/p384.c +++ b/crypto/fipsmodule/ec/p384.c @@ -32,10 +32,10 @@ // #define p384_felem_add(out, in0, in1) bignum_add_p384(out, in0, in1) // when s2n-bignum is used. // -#if !defined(OPENSSL_NO_ASM) && \ - (defined(OPENSSL_LINUX) || defined(OPENSSL_APPLE)) && \ - (defined(OPENSSL_X86_64) || defined(OPENSSL_AARCH64)) && \ - !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX) +#if !defined(OPENSSL_NO_ASM) && \ + (defined(OPENSSL_LINUX) || defined(OPENSSL_APPLE)) && \ + ((defined(OPENSSL_X86_64) && !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX)) || \ + defined(OPENSSL_AARCH64)) # include "../../../third_party/s2n-bignum/include/s2n-bignum_aws-lc.h" diff --git a/crypto/fipsmodule/ec/p521.c b/crypto/fipsmodule/ec/p521.c index efbec42a38..23d2802a2a 100644 --- a/crypto/fipsmodule/ec/p521.c +++ b/crypto/fipsmodule/ec/p521.c @@ -33,10 +33,10 @@ // when Fiat-crypto is used, or as: // #define p521_felem_add(out, in0, in1) bignum_add_p521(out, in0, in1) // when s2n-bignum is used. -#if !defined(OPENSSL_NO_ASM) && \ - (defined(OPENSSL_LINUX) || defined(OPENSSL_APPLE)) && \ - (defined(OPENSSL_X86_64) || defined(OPENSSL_AARCH64)) && \ - !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX) +#if !defined(OPENSSL_NO_ASM) && \ + (defined(OPENSSL_LINUX) || defined(OPENSSL_APPLE)) && \ + ((defined(OPENSSL_X86_64) && !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX)) || \ + defined(OPENSSL_AARCH64)) # include "../../../third_party/s2n-bignum/include/s2n-bignum_aws-lc.h" # define P521_USE_S2N_BIGNUM_FIELD_ARITH 1 From d9282838db3343ad130d147ded7313a04ddebcb6 Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Fri, 11 Aug 2023 17:09:03 +0000 Subject: [PATCH 10/15] define constant macros specifying the size of buffers for Karatsuba --- crypto/fipsmodule/bn/montgomery.c | 17 +++++++----- .../s2n-bignum/include/s2n-bignum_aws-lc.h | 26 +++++++++++++------ 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/crypto/fipsmodule/bn/montgomery.c b/crypto/fipsmodule/bn/montgomery.c index fd3c4027f2..f7621a2a67 100644 --- a/crypto/fipsmodule/bn/montgomery.c +++ b/crypto/fipsmodule/bn/montgomery.c @@ -139,8 +139,13 @@ OPENSSL_INLINE int montgomery_use_s2n_bignum(unsigned int num) { // (4) Temporary buffer's size (t and mulres) used in // montgomery_s2n_bignum_mul_mont does not exceed // BN_MONTGOMERY_MAX_WORDS. + assert(S2NBIGNUM_KSQR_16_32_TEMP_NWORDS <= S2NBIGNUM_KMUL_32_64_TEMP_NWORDS && + S2NBIGNUM_KSQR_32_64_TEMP_NWORDS <= S2NBIGNUM_KMUL_32_64_TEMP_NWORDS && + S2NBIGNUM_KMUL_16_32_TEMP_NWORDS <= S2NBIGNUM_KMUL_32_64_TEMP_NWORDS); + const uint64_t temp_buffer_nwords = + S2NBIGNUM_KMUL_32_64_TEMP_NWORDS + 2 * (uint64_t)num; return !CRYPTO_is_ARMv8_wide_multiplier_capable() && (num % 8 == 0) && - BN_BITS2 == 64 && (2 * (uint64_t)num + 96) <= BN_MONTGOMERY_MAX_WORDS; + BN_BITS2 == 64 && temp_buffer_nwords <= BN_MONTGOMERY_MAX_WORDS; } #else @@ -465,15 +470,15 @@ static void montgomery_s2n_bignum_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, #if defined(BN_MONTGOMERY_USE_S2N_BIGNUM) - // t is a temporary buffer used by big-int multiplication. + // t is a temporary buffer used by Karatsuba multiplication. // bignum_kmul_32_64 requires 96 words. - uint64_t t[96]; + uint64_t t[S2NBIGNUM_KMUL_32_64_TEMP_NWORDS]; // mulres is the output buffer of big-int multiplication. - // If BN_MONTGOMERY_MAX_WORDS - 96 is larger than num*2, its low num*2 - // elements are used. + // If BN_MONTGOMERY_MAX_WORDS - S2NBIGNUM_KMUL_32_64_TEMP_NWORDS is larger + // than num*2, its low num*2 elements are used. // It is montgomery_use_s2n_bignum() that checks whether num*2 fits in the // size of mulres array. - uint64_t mulres[BN_MONTGOMERY_MAX_WORDS - 96]; + uint64_t mulres[BN_MONTGOMERY_MAX_WORDS - S2NBIGNUM_KMUL_32_64_TEMP_NWORDS]; // Given m the prime number stored at np, m * w = -1 mod 2^64. uint64_t w = n0[0]; diff --git a/third_party/s2n-bignum/include/s2n-bignum_aws-lc.h b/third_party/s2n-bignum/include/s2n-bignum_aws-lc.h index d1db326d0b..290fdb66f3 100644 --- a/third_party/s2n-bignum/include/s2n-bignum_aws-lc.h +++ b/third_party/s2n-bignum/include/s2n-bignum_aws-lc.h @@ -133,23 +133,33 @@ extern void curve25519_x25519base_byte_alt(uint8_t res[static 32], const uint8_t // Evaluate z := x^2 where x is a 2048-bit integer. // Input: x[32]; output: z[64]; temporary buffer: t[>=72] -extern void bignum_ksqr_32_64(uint64_t z[static 64], const uint64_t x[static 32], - uint64_t t[static 72]); +#define S2NBIGNUM_KSQR_32_64_TEMP_NWORDS 72 +extern void +bignum_ksqr_32_64(uint64_t z[static 64], const uint64_t x[static 32], + uint64_t t[static S2NBIGNUM_KSQR_32_64_TEMP_NWORDS]); // Evaluate z := x^2 where x is a 1024-bit integer. // Input: x[16]; output: z[32]; temporary buffer: t[>=24] -extern void bignum_ksqr_16_32(uint64_t z[static 32], const uint64_t x[static 16], - uint64_t t[static 24]); +#define S2NBIGNUM_KSQR_16_32_TEMP_NWORDS 24 +extern void +bignum_ksqr_16_32(uint64_t z[static 32], const uint64_t x[static 16], + uint64_t t[static S2NBIGNUM_KSQR_16_32_TEMP_NWORDS]); // Evaluate z := x * y where x and y are 2048-bit integers. // Inputs: x[32], y[32]; output: z[64]; temporary buffer t[>=96] -extern void bignum_kmul_32_64(uint64_t z[static 64], const uint64_t x[static 32], - const uint64_t y[static 32], uint64_t t[static 96]); +#define S2NBIGNUM_KMUL_32_64_TEMP_NWORDS 96 +extern void +bignum_kmul_32_64(uint64_t z[static 64], const uint64_t x[static 32], + const uint64_t y[static 32], + uint64_t t[static S2NBIGNUM_KMUL_32_64_TEMP_NWORDS]); // Evaluate z := x * y where x and y are 1024-bit integers. // Inputs: x[16], y[16]; output: z[32]; temporary buffer t[>=32] -extern void bignum_kmul_16_32(uint64_t z[static 32], const uint64_t x[static 16], - const uint64_t y[static 16], uint64_t t[static 32]); +#define S2NBIGNUM_KMUL_16_32_TEMP_NWORDS 32 +extern void +bignum_kmul_16_32(uint64_t z[static 32], const uint64_t x[static 16], + const uint64_t y[static 16], + uint64_t t[static S2NBIGNUM_KMUL_16_32_TEMP_NWORDS]); // Extended Montgomery reduce in 8-digit blocks. // Assumes that z initially holds a 2k-digit bignum z_0, m is a k-digit odd From 5ac5af0c80fe2e3067bfb2477409a05517fd9bb3 Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Wed, 16 Aug 2023 15:58:44 +0000 Subject: [PATCH 11/15] assert that BN_BITS2 is 64 --- crypto/fipsmodule/bn/montgomery.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crypto/fipsmodule/bn/montgomery.c b/crypto/fipsmodule/bn/montgomery.c index f7621a2a67..4e1309ba7b 100644 --- a/crypto/fipsmodule/bn/montgomery.c +++ b/crypto/fipsmodule/bn/montgomery.c @@ -142,10 +142,11 @@ OPENSSL_INLINE int montgomery_use_s2n_bignum(unsigned int num) { assert(S2NBIGNUM_KSQR_16_32_TEMP_NWORDS <= S2NBIGNUM_KMUL_32_64_TEMP_NWORDS && S2NBIGNUM_KSQR_32_64_TEMP_NWORDS <= S2NBIGNUM_KMUL_32_64_TEMP_NWORDS && S2NBIGNUM_KMUL_16_32_TEMP_NWORDS <= S2NBIGNUM_KMUL_32_64_TEMP_NWORDS); + assert(BN_BITS2 == 64); const uint64_t temp_buffer_nwords = S2NBIGNUM_KMUL_32_64_TEMP_NWORDS + 2 * (uint64_t)num; return !CRYPTO_is_ARMv8_wide_multiplier_capable() && (num % 8 == 0) && - BN_BITS2 == 64 && temp_buffer_nwords <= BN_MONTGOMERY_MAX_WORDS; + temp_buffer_nwords <= BN_MONTGOMERY_MAX_WORDS; } #else From 3484e65d016893c0e534b4ad053e8626caa6b94c Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Wed, 16 Aug 2023 19:16:10 +0000 Subject: [PATCH 12/15] Use BN_MONTGOMERY_S2N_BIGNUM_CAPABLE as elliptic curve's macros do, add comments --- crypto/curve25519/curve25519.c | 2 ++ crypto/fipsmodule/bn/montgomery.c | 4 ++-- crypto/fipsmodule/ec/p384.c | 4 +++- crypto/fipsmodule/ec/p521.c | 4 +++- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/crypto/curve25519/curve25519.c b/crypto/curve25519/curve25519.c index ad346c885c..a184d06b7d 100644 --- a/crypto/curve25519/curve25519.c +++ b/crypto/curve25519/curve25519.c @@ -31,6 +31,8 @@ #include "../internal.h" #include "../fipsmodule/cpucap/internal.h" +// If (1) x86 or aarch64, (2) linux or apple, and (3) OPENSSL_NO_ASM is not +// set, s2n-bignum path is capable. #if ((defined(OPENSSL_X86_64) && \ !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX)) || \ defined(OPENSSL_AARCH64)) && \ diff --git a/crypto/fipsmodule/bn/montgomery.c b/crypto/fipsmodule/bn/montgomery.c index 4e1309ba7b..2296501255 100644 --- a/crypto/fipsmodule/bn/montgomery.c +++ b/crypto/fipsmodule/bn/montgomery.c @@ -128,7 +128,7 @@ #include "../../../third_party/s2n-bignum/include/s2n-bignum_aws-lc.h" -#define BN_MONTGOMERY_USE_S2N_BIGNUM 1 +#define BN_MONTGOMERY_S2N_BIGNUM_CAPABLE 1 OPENSSL_INLINE int montgomery_use_s2n_bignum(unsigned int num) { // Use s2n-bignum's functions only if @@ -469,7 +469,7 @@ static void montgomery_s2n_bignum_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *np, const BN_ULONG *n0, size_t num) { -#if defined(BN_MONTGOMERY_USE_S2N_BIGNUM) +#if defined(BN_MONTGOMERY_S2N_BIGNUM_CAPABLE) // t is a temporary buffer used by Karatsuba multiplication. // bignum_kmul_32_64 requires 96 words. diff --git a/crypto/fipsmodule/ec/p384.c b/crypto/fipsmodule/ec/p384.c index 7d4fccb8b9..48bba36263 100644 --- a/crypto/fipsmodule/ec/p384.c +++ b/crypto/fipsmodule/ec/p384.c @@ -32,10 +32,12 @@ // #define p384_felem_add(out, in0, in1) bignum_add_p384(out, in0, in1) // when s2n-bignum is used. // +// If (1) x86 or aarch64, (2) linux or apple, and (3) OPENSSL_NO_ASM is not +// set, s2n-bignum path is capable. #if !defined(OPENSSL_NO_ASM) && \ (defined(OPENSSL_LINUX) || defined(OPENSSL_APPLE)) && \ ((defined(OPENSSL_X86_64) && !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX)) || \ - defined(OPENSSL_AARCH64)) + defined(OPENSSL_AARCH64)) # include "../../../third_party/s2n-bignum/include/s2n-bignum_aws-lc.h" diff --git a/crypto/fipsmodule/ec/p521.c b/crypto/fipsmodule/ec/p521.c index 23d2802a2a..db3fd66aa9 100644 --- a/crypto/fipsmodule/ec/p521.c +++ b/crypto/fipsmodule/ec/p521.c @@ -33,10 +33,12 @@ // when Fiat-crypto is used, or as: // #define p521_felem_add(out, in0, in1) bignum_add_p521(out, in0, in1) // when s2n-bignum is used. +// If (1) x86 or aarch64, (2) linux or apple, and (3) OPENSSL_NO_ASM is not +// set, s2n-bignum path is capable. #if !defined(OPENSSL_NO_ASM) && \ (defined(OPENSSL_LINUX) || defined(OPENSSL_APPLE)) && \ ((defined(OPENSSL_X86_64) && !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX)) || \ - defined(OPENSSL_AARCH64)) + defined(OPENSSL_AARCH64)) # include "../../../third_party/s2n-bignum/include/s2n-bignum_aws-lc.h" # define P521_USE_S2N_BIGNUM_FIELD_ARITH 1 From 4b09f7947b3303cc178e04e1cff281a801f8468f Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Wed, 16 Aug 2023 23:27:30 +0000 Subject: [PATCH 13/15] update the stack allocation size --- crypto/fipsmodule/bn/montgomery.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/crypto/fipsmodule/bn/montgomery.c b/crypto/fipsmodule/bn/montgomery.c index 2296501255..108427ef51 100644 --- a/crypto/fipsmodule/bn/montgomery.c +++ b/crypto/fipsmodule/bn/montgomery.c @@ -134,19 +134,13 @@ OPENSSL_INLINE int montgomery_use_s2n_bignum(unsigned int num) { // Use s2n-bignum's functions only if // (1) The ARM architecture has slow multipliers, and // (2) num (which is the number of words) is multiplie of 8, because - // s2n-bignum's bignum_emontredc_8n requires it - // (3) The word size is 64 bits, and - // (4) Temporary buffer's size (t and mulres) used in - // montgomery_s2n_bignum_mul_mont does not exceed - // BN_MONTGOMERY_MAX_WORDS. + // s2n-bignum's bignum_emontredc_8n requires it, and + // (3) The word size is 64 bits. assert(S2NBIGNUM_KSQR_16_32_TEMP_NWORDS <= S2NBIGNUM_KMUL_32_64_TEMP_NWORDS && S2NBIGNUM_KSQR_32_64_TEMP_NWORDS <= S2NBIGNUM_KMUL_32_64_TEMP_NWORDS && S2NBIGNUM_KMUL_16_32_TEMP_NWORDS <= S2NBIGNUM_KMUL_32_64_TEMP_NWORDS); assert(BN_BITS2 == 64); - const uint64_t temp_buffer_nwords = - S2NBIGNUM_KMUL_32_64_TEMP_NWORDS + 2 * (uint64_t)num; - return !CRYPTO_is_ARMv8_wide_multiplier_capable() && (num % 8 == 0) && - temp_buffer_nwords <= BN_MONTGOMERY_MAX_WORDS; + return !CRYPTO_is_ARMv8_wide_multiplier_capable() && (num % 8 == 0); } #else @@ -472,14 +466,12 @@ static void montgomery_s2n_bignum_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, #if defined(BN_MONTGOMERY_S2N_BIGNUM_CAPABLE) // t is a temporary buffer used by Karatsuba multiplication. - // bignum_kmul_32_64 requires 96 words. + // bignum_kmul_32_64 requires S2NBIGNUM_KMUL_32_64_TEMP_NWORDS words. uint64_t t[S2NBIGNUM_KMUL_32_64_TEMP_NWORDS]; - // mulres is the output buffer of big-int multiplication. - // If BN_MONTGOMERY_MAX_WORDS - S2NBIGNUM_KMUL_32_64_TEMP_NWORDS is larger - // than num*2, its low num*2 elements are used. - // It is montgomery_use_s2n_bignum() that checks whether num*2 fits in the - // size of mulres array. - uint64_t mulres[BN_MONTGOMERY_MAX_WORDS - S2NBIGNUM_KMUL_32_64_TEMP_NWORDS]; + // mulres is the output buffer of big-int multiplication which uses + // 2 * num elements of mulres. Note that num <= BN_MONTGOMERY_MAX_WORDS + // is guaranteed by the caller (BN_mod_mul_montgomery). + uint64_t mulres[2 * BN_MONTGOMERY_MAX_WORDS]; // Given m the prime number stored at np, m * w = -1 mod 2^64. uint64_t w = n0[0]; From 05c8a3d3dac403025319a24c2baaf15f81fd8df6 Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Wed, 16 Aug 2023 23:58:19 +0000 Subject: [PATCH 14/15] Update comment --- crypto/fipsmodule/bn/montgomery.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/crypto/fipsmodule/bn/montgomery.c b/crypto/fipsmodule/bn/montgomery.c index 108427ef51..b6afa5571a 100644 --- a/crypto/fipsmodule/bn/montgomery.c +++ b/crypto/fipsmodule/bn/montgomery.c @@ -452,12 +452,12 @@ static int bn_mod_mul_montgomery_fallback(BIGNUM *r, const BIGNUM *a, // Perform montgomery multiplication using s2n-bignum functions. The arguments // are equivalent to the arguments of bn_mul_mont. -// montgomery_s2n_bignum_mul_mont works only if num is a multiple of 8. For -// num = 32 or num = 16, this uses faster primitives in s2n-bignum. -// Additionally, montgomery_s2n_bignum_mul_mont allocates arrays at a stack, and -// large num leads to out of bounds accesses of the arrays. -// montgomery_use_s2n_bignum(num) must be called in advance to check these -// conditions. +// montgomery_s2n_bignum_mul_mont works only if num is a multiple of 8. +// montgomery_use_s2n_bignum(num) must be called in advance to check this +// condition. +// For num = 32 or num = 16, this uses faster primitives in s2n-bignum. +// montgomery_s2n_bignum_mul_mont allocates S2NBIGNUM_KMUL_32_64_TEMP_NWORDS + +// 2 * BN_MONTGOMERY_MAX_WORDS uint64_t words at the stack. static void montgomery_s2n_bignum_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, From fc4d6ff54b0cedd3efb12d31fd94b1303ea3f112 Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Thu, 17 Aug 2023 15:56:17 +0000 Subject: [PATCH 15/15] x86 -> x86_64 --- crypto/curve25519/curve25519.c | 2 +- crypto/fipsmodule/ec/p384.c | 2 +- crypto/fipsmodule/ec/p521.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/crypto/curve25519/curve25519.c b/crypto/curve25519/curve25519.c index a184d06b7d..7dea771e12 100644 --- a/crypto/curve25519/curve25519.c +++ b/crypto/curve25519/curve25519.c @@ -31,7 +31,7 @@ #include "../internal.h" #include "../fipsmodule/cpucap/internal.h" -// If (1) x86 or aarch64, (2) linux or apple, and (3) OPENSSL_NO_ASM is not +// If (1) x86_64 or aarch64, (2) linux or apple, and (3) OPENSSL_NO_ASM is not // set, s2n-bignum path is capable. #if ((defined(OPENSSL_X86_64) && \ !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX)) || \ diff --git a/crypto/fipsmodule/ec/p384.c b/crypto/fipsmodule/ec/p384.c index 48bba36263..716e9b409b 100644 --- a/crypto/fipsmodule/ec/p384.c +++ b/crypto/fipsmodule/ec/p384.c @@ -32,7 +32,7 @@ // #define p384_felem_add(out, in0, in1) bignum_add_p384(out, in0, in1) // when s2n-bignum is used. // -// If (1) x86 or aarch64, (2) linux or apple, and (3) OPENSSL_NO_ASM is not +// If (1) x86_64 or aarch64, (2) linux or apple, and (3) OPENSSL_NO_ASM is not // set, s2n-bignum path is capable. #if !defined(OPENSSL_NO_ASM) && \ (defined(OPENSSL_LINUX) || defined(OPENSSL_APPLE)) && \ diff --git a/crypto/fipsmodule/ec/p521.c b/crypto/fipsmodule/ec/p521.c index db3fd66aa9..cfb2f65daf 100644 --- a/crypto/fipsmodule/ec/p521.c +++ b/crypto/fipsmodule/ec/p521.c @@ -33,7 +33,7 @@ // when Fiat-crypto is used, or as: // #define p521_felem_add(out, in0, in1) bignum_add_p521(out, in0, in1) // when s2n-bignum is used. -// If (1) x86 or aarch64, (2) linux or apple, and (3) OPENSSL_NO_ASM is not +// If (1) x86_64 or aarch64, (2) linux or apple, and (3) OPENSSL_NO_ASM is not // set, s2n-bignum path is capable. #if !defined(OPENSSL_NO_ASM) && \ (defined(OPENSSL_LINUX) || defined(OPENSSL_APPLE)) && \