Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update montgomery multiplication to use s2n-bignum's verified scalar bignum functions #1135

Merged
merged 21 commits into from
Aug 17, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions crypto/fipsmodule/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,20 @@ if((ARCH STREQUAL "x86_64" OR ARCH STREQUAL "aarch64") AND
curve25519/curve25519_x25519base_byte.S
curve25519/curve25519_x25519base_byte_alt.S
)

# Big integer arithmetics using s2n-bignum
list(APPEND S2N_BIGNUM_ASM_SOURCES
fastmul/bignum_kmul_16_32.S
fastmul/bignum_kmul_32_64.S
fastmul/bignum_ksqr_16_32.S
fastmul/bignum_ksqr_32_64.S
fastmul/bignum_emontredc_8n.S

generic/bignum_ge.S
generic/bignum_mul.S
generic/bignum_optsub.S
generic/bignum_sqr.S
)
endif()
endif()

Expand Down
127 changes: 122 additions & 5 deletions crypto/fipsmodule/bn/montgomery.c
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,47 @@
#include <openssl/type_check.h>

#include "internal.h"
#include "../cpucap/internal.h"
#include "../../internal.h"

#if !defined(OPENSSL_NO_ASM) && \
(defined(OPENSSL_LINUX) || defined(OPENSSL_APPLE)) && \
defined(OPENSSL_AARCH64) && !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX) && \
aqjune-aws marked this conversation as resolved.
Show resolved Hide resolved
defined(OPENSSL_BN_ASM_MONT)

#include "../../../third_party/s2n-bignum/include/s2n-bignum_aws-lc.h"

#define BN_MONTGOMERY_USE_S2N_BIGNUM 1
aqjune-aws marked this conversation as resolved.
Show resolved Hide resolved

#endif

OPENSSL_INLINE int montgomery_s2n_bignum_capable(void) {
aqjune-aws marked this conversation as resolved.
Show resolved Hide resolved
#if defined(BN_MONTGOMERY_USE_S2N_BIGNUM)

return 1;

#else

return 0;

#endif
}

OPENSSL_INLINE int montgomery_use_s2n_bignum(unsigned int num) {
aqjune-aws marked this conversation as resolved.
Show resolved Hide resolved
#if defined(BN_MONTGOMERY_USE_S2N_BIGNUM)

// Use s2n-bignum's functions only if (1) the ARM architecture has slow
// multipliers, and (2) temporary buffer's size does not exceed
aqjune-aws marked this conversation as resolved.
Show resolved Hide resolved
// BN_MONTGOMERY_MAX_WORDS.
return !CRYPTO_is_ARMv8_wide_multiplier_capable() && (num % 8 == 0) &&
BN_BITS2 == 64 && (2 * (uint64_t)num + 96) <= BN_MONTGOMERY_MAX_WORDS;
aqjune-aws marked this conversation as resolved.
Show resolved Hide resolved

#else

return 0;

#endif
}

BN_MONT_CTX *BN_MONT_CTX_new(void) {
BN_MONT_CTX *ret = OPENSSL_malloc(sizeof(BN_MONT_CTX));
Expand Down Expand Up @@ -418,6 +457,77 @@ static int bn_mod_mul_montgomery_fallback(BIGNUM *r, const BIGNUM *a,
return ret;
}


#if defined(OPENSSL_BN_ASM_MONT)
aqjune-aws marked this conversation as resolved.
Show resolved Hide resolved

static void montgomery_s2n_bignum_mul_mont(BN_ULONG *rp, const BN_ULONG *ap,
aqjune-aws marked this conversation as resolved.
Show resolved Hide resolved
const BN_ULONG *bp,
const BN_ULONG *np,
const BN_ULONG *n0, size_t num) {

#if defined(BN_MONTGOMERY_USE_S2N_BIGNUM)
aqjune-aws marked this conversation as resolved.
Show resolved Hide resolved

// t is a temporary buffer used by big-int multiplication.
// bignum_kmul_32_64 requires 96 words at maximum.
uint64_t t[96];
aqjune-aws marked this conversation as resolved.
Show resolved Hide resolved
// l is the output buffer of big-int multiplication.
aqjune-aws marked this conversation as resolved.
Show resolved Hide resolved
// Its low num*2 elements are used.
// It is montgomery_use_s2n_bignum() that checks whether num*2 fits in the
// size of mulres array.
uint64_t mulres[BN_MONTGOMERY_MAX_WORDS - 96];

// BN_ULONG is uint64_t since BN_BITS2 is 64.
// m is the prime number, and m * w = -1 mod 2^64.
uint64_t *m = (uint64_t *)np;
aqjune-aws marked this conversation as resolved.
Show resolved Hide resolved
uint64_t w = (uint64_t)n0[0];
uint64_t *src = (uint64_t *)ap, *src2 = (uint64_t *)bp;
uint64_t *dest = (uint64_t *)rp;
uint64_t c;

if (num == 32) {
if (ap == bp)
bignum_ksqr_32_64(mulres, src, t);
else
bignum_kmul_32_64(mulres, src2, src, t);
} else if (num == 16) {
if (ap == bp)
bignum_ksqr_16_32(mulres, src, t);
else
bignum_kmul_16_32(mulres, src2, src, t);
} else {
if (ap == bp)
bignum_sqr(num * 2, mulres, num, src);
else
bignum_mul(num * 2, mulres, num, src2, num, src);
}

// Do montgomery reduction. We follow the definition of montgomery reduction
// which is:
// 1. Calculate (mulres + ((mulres mod R) * (-m^-1 mod R) mod R) * m) / R
// using
// bignum_emontredc_8n, where R is 2^(64*num).
// The calculated result is stored in the upper half elements of the mulres
// buffer. If the result overflows num words, bignum_emontredc_8n
// returns 1.
// 2. Optionally subtract the result if the (result of step 1) >= m.
// The comparison is true if (1) there is an overflow (bignum_emontredc_8n
// returns 1), or (2) the upper half mulres is larger than m.
c = bignum_emontredc_8n(num, mulres, m, w);
c |= bignum_ge(num, mulres + num, num, m);
// Do the step 2 and store the result at dest (which is rp)
bignum_optsub(num, dest, mulres + num, c, m);

#else

// Should not call this function unless s2n-bignum is supported.
abort();

#endif
}

#endif


int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
const BN_MONT_CTX *mont, BN_CTX *ctx) {
if (a->neg || b->neg) {
Expand All @@ -437,11 +547,18 @@ int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
// This bound is implied by |bn_mont_ctx_set_N_and_n0|. |bn_mul_mont|
// allocates |num| words on the stack, so |num| cannot be too large.
assert((size_t)num <= BN_MONTGOMERY_MAX_WORDS);
if (!bn_mul_mont(r->d, a->d, b->d, mont->N.d, mont->n0, num)) {
// The check above ensures this won't happen.
assert(0);
OPENSSL_PUT_ERROR(BN, ERR_R_INTERNAL_ERROR);
return 0;

if (montgomery_s2n_bignum_capable() && montgomery_use_s2n_bignum(num)) {
// Do montgomery multiplication using s2n-bignum.
montgomery_s2n_bignum_mul_mont(r->d, a->d, b->d, mont->N.d, mont->n0,
num);
} else {
if (!bn_mul_mont(r->d, a->d, b->d, mont->N.d, mont->n0, num)) {
// The check above ensures this won't happen.
assert(0);
OPENSSL_PUT_ERROR(BN, ERR_R_INTERNAL_ERROR);
return 0;
}
}
r->neg = 0;
r->width = num;
Expand Down
50 changes: 50 additions & 0 deletions third_party/s2n-bignum/include/s2n-bignum_aws-lc.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,3 +130,53 @@ extern void curve25519_x25519_byte_alt(uint8_t res[static 32], const uint8_t sca
// another u-coordinate, is saved in |res|.
extern void curve25519_x25519base_byte(uint8_t res[static 32], const uint8_t scalar[static 32]);
extern void curve25519_x25519base_byte_alt(uint8_t res[static 32], const uint8_t scalar[static 32]);

// Evaluate z := x^2 where x is a 2048-bit integer.
// Input: x[32]; output: z[64]; temporary buffer: t[>=72]
extern void bignum_ksqr_32_64(uint64_t z[static 64], uint64_t x[static 32],
uint64_t t[static 72]);

// Evaluate z := x^2 where x is a 1024-bit integer.
// Input: x[16]; output: z[32]; temporary buffer: t[>=24]
extern void bignum_ksqr_16_32(uint64_t z[static 32], uint64_t x[static 16],
uint64_t t[static 24]);

// Evaluate z := x * y where x and y are 2048-bit integers.
// Inputs: x[32], y[32]; output: z[64]; temporary buffer t[>=96]
extern void bignum_kmul_32_64(uint64_t z[static 64], uint64_t x[static 32],
uint64_t y[static 32], uint64_t t[static 96]);

// Evaluate z := x * y where x and y are 1024-bit integers.
// Inputs: x[16], y[16]; output: z[32]; temporary buffer t[>=32]
extern void bignum_kmul_16_32(uint64_t z[static 32], uint64_t x[static 16],
uint64_t y[static 16], uint64_t t[static 32]);

// Extended Montgomery reduce in 8-digit blocks.
// Assumes that z initially holds a 2k-digit bignum z_0, m is a k-digit odd
// bignum and m * w == -1 (mod 2^64). This function also uses z for the output
// as well as returning a carry c of 0 or 1. This encodes two numbers: in the
// lower half of the z buffer we have q = z[0..k-1], while the upper half
// together with the carry gives r = 2^{64k}*c + z[k..2k-1]. These values
// satisfy z_0 + q * m = 2^{64k} * r, i.e. r gives a raw (unreduced) Montgomery
// reduction while q gives the multiplier that was used.
// Inputs: z[2*k], m[k], w; outputs: function return (extra result bit) and z[2*k]
extern uint64_t bignum_emontredc_8n(uint64_t k, uint64_t *z, uint64_t *m,
uint64_t w);

// Optionally subtract, z := x - y (if p nonzero) or z := x (if p zero)
// Inputs: x[k], p, y[k]; outputs: function return (carry-out) and z[k]
extern uint64_t bignum_optsub(uint64_t k, uint64_t *z, uint64_t *x, uint64_t p,
uint64_t *y);

// Compare bignums, x >= y.
// Inputs: x[m], y[n]; output: function return (1 if x >= y)
extern uint64_t bignum_ge(uint64_t m, uint64_t *x, uint64_t n, uint64_t *y);

// General big-integer multiplication (z := x * y).
// Inputs: x[m], y[n]; output: z[k]. If k < m+n, the result is truncated.
extern void bignum_mul(uint64_t k, uint64_t *z, uint64_t m, uint64_t *x,
uint64_t n, uint64_t *y);

// General big-integer squaring (z := x^2).
// Inputs: x[m]; output: z[k]. If k < 2m, the result is truncated.
extern void bignum_sqr(uint64_t k, uint64_t *z, uint64_t m, uint64_t *x);