-
Notifications
You must be signed in to change notification settings - Fork 125
/
Copy pathec_nistp.c
347 lines (302 loc) · 12.5 KB
/
ec_nistp.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0 OR ISC
// In this file we will implement elliptic curve point operations for
// NIST curves P-256, P-384, and P-521. The idea is to implement the operations
// in a generic way such that the code can be reused instead of having
// a separate implementation for each of the curves. We implement:
// 1. point addition,
// 2. point doubling,
// 3. scalar multiplication of a base point,
// 4. scalar multiplication of an arbitrary point,
// 5. scalar multiplication of a base and an arbitrary point.
//
// Matrix of what has been done so far:
//
// | op | P-521 | P-384 | P-256 |
// |----------------------------|
// | 1. | x | x | x* |
// | 2. | x | x | x* |
// | 3. | | | |
// | 4. | | | |
// | 5. | | | |
// * For P-256, only the Fiat-crypto implementation in p256.c is replaced.
#include "ec_nistp.h"
// Some of the functions below need temporary field element variables.
// To avoid dynamic allocation we define nistp_felem type to have the maximum
// size possible (which is currently P-521 curve). The values are hard-coded
// for the moment, this will be fixed when we migrate the whole P-521
// implementation to ec_nistp.c.
#if defined(EC_NISTP_USE_64BIT_LIMB)
#define NISTP_FELEM_MAX_NUM_OF_LIMBS (9)
#else
#define NISTP_FELEM_MAX_NUM_OF_LIMBS (19)
#endif
typedef ec_nistp_felem_limb ec_nistp_felem[NISTP_FELEM_MAX_NUM_OF_LIMBS];
// Conditional copy in constant-time (out = t == 0 ? z : nz).
static void cmovznz(ec_nistp_felem_limb *out,
size_t num_limbs,
ec_nistp_felem_limb t,
const ec_nistp_felem_limb *z,
const ec_nistp_felem_limb *nz) {
ec_nistp_felem_limb mask = constant_time_is_zero_w(t);
for (size_t i = 0; i < num_limbs; i++) {
out[i] = constant_time_select_w(mask, z[i], nz[i]);
}
}
// Group operations
// ----------------
//
// Building on top of the field operations we have the operations on the
// elliptic curve group itself. Points on the curve are represented in Jacobian
// coordinates.
//
// ec_nistp_point_double calculates 2*(x_in, y_in, z_in)
//
// The method is based on:
// http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
// for which there is a Coq transcription and correctness proof:
// <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L93>
// <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L201>
//
// However, we slighty changed the computation for efficiency (see the full
// explanation within the function body), which makes the Coq proof above
// not applicable to our implementation.
// TODO(awslc): Write a Coq correctness proof for our version of the algorithm.
//
// Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed;
// while x_out == y_in is not (maybe this works, but it's not tested).
void ec_nistp_point_double(const ec_nistp_meth *ctx,
ec_nistp_felem_limb *x_out,
ec_nistp_felem_limb *y_out,
ec_nistp_felem_limb *z_out,
const ec_nistp_felem_limb *x_in,
const ec_nistp_felem_limb *y_in,
const ec_nistp_felem_limb *z_in) {
ec_nistp_felem delta, gamma, beta, ftmp, ftmp2, tmptmp, alpha, fourbeta;
// delta = z^2
ctx->felem_sqr(delta, z_in);
// gamma = y^2
ctx->felem_sqr(gamma, y_in);
// beta = x*gamma
ctx->felem_mul(beta, x_in, gamma);
// alpha = 3*(x-delta)*(x+delta)
ctx->felem_sub(ftmp, x_in, delta);
ctx->felem_add(ftmp2, x_in, delta);
ctx->felem_add(tmptmp, ftmp2, ftmp2);
ctx->felem_add(ftmp2, ftmp2, tmptmp);
ctx->felem_mul(alpha, ftmp, ftmp2);
// x' = alpha^2 - 8*beta
ctx->felem_sqr(x_out, alpha);
ctx->felem_add(fourbeta, beta, beta);
ctx->felem_add(fourbeta, fourbeta, fourbeta);
ctx->felem_add(tmptmp, fourbeta, fourbeta);
ctx->felem_sub(x_out, x_out, tmptmp);
// z' = (y + z)^2 - gamma - delta
// The following calculation differs from the Coq proof cited above.
// The proof is for:
// add(delta, gamma, delta);
// add(ftmp, y_in, z_in);
// square(z_out, ftmp);
// sub(z_out, z_out, delta);
// Our operations sequence is a bit more efficient because it saves us
// a certain number of conditional moves.
ctx->felem_add(ftmp, y_in, z_in);
ctx->felem_sqr(z_out, ftmp);
ctx->felem_sub(z_out, z_out, gamma);
ctx->felem_sub(z_out, z_out, delta);
// y' = alpha*(4*beta - x') - 8*gamma^2
ctx->felem_sub(y_out, fourbeta, x_out);
ctx->felem_add(gamma, gamma, gamma);
ctx->felem_sqr(gamma, gamma);
ctx->felem_mul(y_out, alpha, y_out);
ctx->felem_add(gamma, gamma, gamma);
ctx->felem_sub(y_out, y_out, gamma);
}
// ec_nistp_point_add calculates (x1, y1, z1) + (x2, y2, z2)
//
// The method is taken from:
// http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian.html#addition-add-2007-bl
// adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
//
// Coq transcription and correctness proof:
// <https://github.com/davidben/fiat-crypto/blob/c7b95f62b2a54b559522573310e9b487327d219a/src/Curves/Weierstrass/Jacobian.v#L467>
// <https://github.com/davidben/fiat-crypto/blob/c7b95f62b2a54b559522573310e9b487327d219a/src/Curves/Weierstrass/Jacobian.v#L544>
//
// This function includes a branch for checking whether the two input points
// are equal, (while not equal to the point at infinity). This case should
// never happen during single point multiplication, so there is no timing leak
// for ECDH and ECDSA.
void ec_nistp_point_add(const ec_nistp_meth *ctx,
ec_nistp_felem_limb *x3,
ec_nistp_felem_limb *y3,
ec_nistp_felem_limb *z3,
const ec_nistp_felem_limb *x1,
const ec_nistp_felem_limb *y1,
const ec_nistp_felem_limb *z1,
const int mixed,
const ec_nistp_felem_limb *x2,
const ec_nistp_felem_limb *y2,
const ec_nistp_felem_limb *z2) {
ec_nistp_felem x_out, y_out, z_out;
ec_nistp_felem_limb z1nz = ctx->felem_nz(z1);
ec_nistp_felem_limb z2nz = ctx->felem_nz(z2);
// z1z1 = z1**2
ec_nistp_felem z1z1;
ctx->felem_sqr(z1z1, z1);
ec_nistp_felem u1, s1, two_z1z2;
if (!mixed) {
// z2z2 = z2**2
ec_nistp_felem z2z2;
ctx->felem_sqr(z2z2, z2);
// u1 = x1*z2z2
ctx->felem_mul(u1, x1, z2z2);
// two_z1z2 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2
ctx->felem_add(two_z1z2, z1, z2);
ctx->felem_sqr(two_z1z2, two_z1z2);
ctx->felem_sub(two_z1z2, two_z1z2, z1z1);
ctx->felem_sub(two_z1z2, two_z1z2, z2z2);
// s1 = y1 * z2**3
ctx->felem_mul(s1, z2, z2z2);
ctx->felem_mul(s1, s1, y1);
} else {
// We'll assume z2 = 1 (special case z2 = 0 is handled later).
// u1 = x1*z2z2
OPENSSL_memcpy(u1, x1, ctx->felem_num_limbs * sizeof(ec_nistp_felem_limb));
// two_z1z2 = 2z1z2
ctx->felem_add(two_z1z2, z1, z1);
// s1 = y1 * z2**3
OPENSSL_memcpy(s1, y1, ctx->felem_num_limbs * sizeof(ec_nistp_felem_limb));
}
// u2 = x2*z1z1
ec_nistp_felem u2;
ctx->felem_mul(u2, x2, z1z1);
// h = u2 - u1
ec_nistp_felem h;
ctx->felem_sub(h, u2, u1);
ec_nistp_felem_limb xneq = ctx->felem_nz(h);
// z_out = two_z1z2 * h
ctx->felem_mul(z_out, h, two_z1z2);
// z1z1z1 = z1 * z1z1
ec_nistp_felem z1z1z1;
ctx->felem_mul(z1z1z1, z1, z1z1);
// s2 = y2 * z1**3
ec_nistp_felem s2;
ctx->felem_mul(s2, y2, z1z1z1);
// r = (s2 - s1)*2
ec_nistp_felem r;
ctx->felem_sub(r, s2, s1);
ctx->felem_add(r, r, r);
ec_nistp_felem_limb yneq = ctx->felem_nz(r);
// This case will never occur in the constant-time |ec_GFp_mont_mul|.
ec_nistp_felem_limb is_nontrivial_double =
constant_time_is_zero_w(xneq | yneq) &
~constant_time_is_zero_w(z1nz) &
~constant_time_is_zero_w(z2nz);
if (constant_time_declassify_w(is_nontrivial_double)) {
ec_nistp_point_double(ctx, x3, y3, z3, x1, y1, z1);
return;
}
// I = (2h)**2
ec_nistp_felem i;
ctx->felem_add(i, h, h);
ctx->felem_sqr(i, i);
// J = h * I
ec_nistp_felem j;
ctx->felem_mul(j, h, i);
// V = U1 * I
ec_nistp_felem v;
ctx->felem_mul(v, u1, i);
// x_out = r**2 - J - 2V
ctx->felem_sqr(x_out, r);
ctx->felem_sub(x_out, x_out, j);
ctx->felem_sub(x_out, x_out, v);
ctx->felem_sub(x_out, x_out, v);
// y_out = r(V-x_out) - 2 * s1 * J
ctx->felem_sub(y_out, v, x_out);
ctx->felem_mul(y_out, y_out, r);
ec_nistp_felem s1j;
ctx->felem_mul(s1j, s1, j);
ctx->felem_sub(y_out, y_out, s1j);
ctx->felem_sub(y_out, y_out, s1j);
cmovznz(x_out, ctx->felem_num_limbs, z1nz, x2, x_out);
cmovznz(y_out, ctx->felem_num_limbs, z1nz, y2, y_out);
cmovznz(z_out, ctx->felem_num_limbs, z1nz, z2, z_out);
cmovznz(x3, ctx->felem_num_limbs, z2nz, x1, x_out);
cmovznz(y3, ctx->felem_num_limbs, z2nz, y1, y_out);
cmovznz(z3, ctx->felem_num_limbs, z2nz, z1, z_out);
}
static int16_t get_bit(const EC_SCALAR *in, size_t in_bit_size, size_t i) {
if (i >= in_bit_size) {
return 0;
}
#if defined(OPENSSL_64_BIT)
assert(sizeof(BN_ULONG) == 8);
return (in->words[i >> 6] >> (i & 63)) & 1;
#else
assert(sizeof(BN_ULONG) == 4);
return (in->words[i >> 5] >> (i & 31)) & 1;
#endif
}
#define DIV_AND_CEIL(a, b) ((a + b - 1) / b)
// Compute "regular" wNAF representation of a scalar, see
// Joye, Tunstall, "Exponent Recoding and Regular Exponentiation Algorithms",
// AfricaCrypt 2009, Alg 6.
// It forces an odd scalar and outputs digits in
// {\pm 1, \pm 3, \pm 5, \pm 7, \pm 9, ...}
// i.e. signed odd digits with _no zeroes_ -- that makes it "regular".
void scalar_rwnaf(int16_t *out, size_t window_size,
const EC_SCALAR *scalar, size_t scalar_bit_size) {
assert(window_size < 14);
// The assert above ensures this works correctly.
const int16_t window_mask = (1 << (window_size + 1)) - 1;
int16_t window = (int16_t)(scalar->words[0] & (BN_ULONG)window_mask);
window |= 1;
const size_t num_windows = DIV_AND_CEIL(scalar_bit_size, window_size);
for (size_t i = 0; i < num_windows - 1; i++) {
int16_t d = (window & window_mask) - (int16_t)(1 << window_size);
out[i] = d;
window = (window - d) >> window_size;
for (size_t j = 1; j <= window_size; j++) {
window += get_bit(scalar, scalar_bit_size, (i + 1) * window_size + j) << j;
}
}
out[num_windows - 1] = window;
}
// The window size for scalar multiplication is hard coded for now.
#define SCALAR_MUL_WINDOW_SIZE (5)
#define SCALAR_MUL_TABLE_NUM_POINTS (1 << (SCALAR_MUL_WINDOW_SIZE - 1))
// Generate table of multiples of the input point P = (x_in, y_in, z_in):
// table <-- [2i + 1]P for i in [0, 15].
void generate_table(const ec_nistp_meth *ctx,
ec_nistp_felem_limb *table,
ec_nistp_felem_limb *x_in,
ec_nistp_felem_limb *y_in,
ec_nistp_felem_limb *z_in)
{
const size_t felem_num_limbs = ctx->felem_num_limbs;
const size_t felem_num_bytes = felem_num_limbs * sizeof(ec_nistp_felem_limb);
// table[0] <-- P.
OPENSSL_memcpy(&table[felem_num_limbs * 0], x_in, felem_num_bytes);
OPENSSL_memcpy(&table[felem_num_limbs * 1], y_in, felem_num_bytes);
OPENSSL_memcpy(&table[felem_num_limbs * 2], z_in, felem_num_bytes);
// Compute 2P.
ec_nistp_felem x_in_dbl, y_in_dbl, z_in_dbl;
ctx->point_dbl(x_in_dbl, y_in_dbl, z_in_dbl,
&table[0 * felem_num_limbs],
&table[1 * felem_num_limbs],
&table[2 * felem_num_limbs]);
// Compute the rest of the table.
for (size_t i = 1; i < SCALAR_MUL_TABLE_NUM_POINTS; i++) {
// Just getting pointers to i-th and (i-1)-th point in the table.
ec_nistp_felem_limb *point_i = &table[i * 3 * felem_num_limbs];
ec_nistp_felem_limb *point_im1 = &table[(i - 1) * 3 * felem_num_limbs];
// table[i] <-- table[i - 1] + 2P
ctx->point_add(&point_i[0 * felem_num_limbs],
&point_i[1 * felem_num_limbs],
&point_i[2 * felem_num_limbs],
&point_im1[0 * felem_num_limbs],
&point_im1[1 * felem_num_limbs],
&point_im1[2 * felem_num_limbs],
0, x_in_dbl, y_in_dbl, z_in_dbl);
}
}