1 | /*
|
---|
2 | * Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
|
---|
3 | *
|
---|
4 | * Licensed under the Apache License 2.0 (the "License"). You may not use
|
---|
5 | * this file except in compliance with the License. You can obtain a copy
|
---|
6 | * in the file LICENSE in the source distribution or at
|
---|
7 | * https://www.openssl.org/source/license.html
|
---|
8 | */
|
---|
9 |
|
---|
10 | /* Copyright 2023 IBM Corp.
|
---|
11 | *
|
---|
12 | * Licensed under the Apache License, Version 2.0 (the "License");
|
---|
13 | *
|
---|
14 | * you may not use this file except in compliance with the License.
|
---|
15 | * You may obtain a copy of the License at
|
---|
16 | *
|
---|
17 | * http://www.apache.org/licenses/LICENSE-2.0
|
---|
18 | *
|
---|
19 | * Unless required by applicable law or agreed to in writing, software
|
---|
20 | * distributed under the License is distributed on an "AS IS" BASIS,
|
---|
21 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
---|
22 | * See the License for the specific language governing permissions and
|
---|
23 | * limitations under the License.
|
---|
24 | */
|
---|
25 |
|
---|
26 | /*
|
---|
27 | * Designed for 56-bit limbs by Rohan McLure <[email protected]>.
|
---|
28 | * The layout is based on that of ecp_nistp{224,521}.c, allowing even for asm
|
---|
29 | * acceleration of felem_{square,mul} as supported in these files.
|
---|
30 | */
|
---|
31 |
|
---|
32 | #include <openssl/e_os2.h>
|
---|
33 |
|
---|
34 | #include <string.h>
|
---|
35 | #include <openssl/err.h>
|
---|
36 | #include "ec_local.h"
|
---|
37 |
|
---|
38 | #include "internal/numbers.h"
|
---|
39 |
|
---|
40 | #ifndef INT128_MAX
|
---|
41 | # error "Your compiler doesn't appear to support 128-bit integer types"
|
---|
42 | #endif
|
---|
43 |
|
---|
44 | typedef uint8_t u8;
|
---|
45 | typedef uint64_t u64;
|
---|
46 |
|
---|
47 | /*
|
---|
48 | * The underlying field. P384 operates over GF(2^384-2^128-2^96+2^32-1). We
|
---|
49 | * can serialize an element of this field into 48 bytes. We call this an
|
---|
50 | * felem_bytearray.
|
---|
51 | */
|
---|
52 |
|
---|
53 | typedef u8 felem_bytearray[48];
|
---|
54 |
|
---|
55 | /*
|
---|
56 | * These are the parameters of P384, taken from FIPS 186-3, section D.1.2.4.
|
---|
57 | * These values are big-endian.
|
---|
58 | */
|
---|
59 | static const felem_bytearray nistp384_curve_params[5] = {
|
---|
60 | {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* p */
|
---|
61 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
---|
62 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
|
---|
63 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF},
|
---|
64 | {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* a = -3 */
|
---|
65 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
|
---|
66 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF,
|
---|
67 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFC},
|
---|
68 | {0xB3, 0x31, 0x2F, 0xA7, 0xE2, 0x3E, 0xE7, 0xE4, 0x98, 0x8E, 0x05, 0x6B, /* b */
|
---|
69 | 0xE3, 0xF8, 0x2D, 0x19, 0x18, 0x1D, 0x9C, 0x6E, 0xFE, 0x81, 0x41, 0x12,
|
---|
70 | 0x03, 0x14, 0x08, 0x8F, 0x50, 0x13, 0x87, 0x5A, 0xC6, 0x56, 0x39, 0x8D,
|
---|
71 | 0x8A, 0x2E, 0xD1, 0x9D, 0x2A, 0x85, 0xC8, 0xED, 0xD3, 0xEC, 0x2A, 0xEF},
|
---|
72 | {0xAA, 0x87, 0xCA, 0x22, 0xBE, 0x8B, 0x05, 0x37, 0x8E, 0xB1, 0xC7, 0x1E, /* x */
|
---|
73 | 0xF3, 0x20, 0xAD, 0x74, 0x6E, 0x1D, 0x3B, 0x62, 0x8B, 0xA7, 0x9B, 0x98,
|
---|
74 | 0x59, 0xF7, 0x41, 0xE0, 0x82, 0x54, 0x2A, 0x38, 0x55, 0x02, 0xF2, 0x5D,
|
---|
75 | 0xBF, 0x55, 0x29, 0x6C, 0x3A, 0x54, 0x5E, 0x38, 0x72, 0x76, 0x0A, 0xB7},
|
---|
76 | {0x36, 0x17, 0xDE, 0x4A, 0x96, 0x26, 0x2C, 0x6F, 0x5D, 0x9E, 0x98, 0xBF, /* y */
|
---|
77 | 0x92, 0x92, 0xDC, 0x29, 0xF8, 0xF4, 0x1D, 0xBD, 0x28, 0x9A, 0x14, 0x7C,
|
---|
78 | 0xE9, 0xDA, 0x31, 0x13, 0xB5, 0xF0, 0xB8, 0xC0, 0x0A, 0x60, 0xB1, 0xCE,
|
---|
79 | 0x1D, 0x7E, 0x81, 0x9D, 0x7A, 0x43, 0x1D, 0x7C, 0x90, 0xEA, 0x0E, 0x5F},
|
---|
80 | };
|
---|
81 |
|
---|
82 | /*-
|
---|
83 | * The representation of field elements.
|
---|
84 | * ------------------------------------
|
---|
85 | *
|
---|
86 | * We represent field elements with seven values. These values are either 64 or
|
---|
87 | * 128 bits and the field element represented is:
|
---|
88 | * v[0]*2^0 + v[1]*2^56 + v[2]*2^112 + ... + v[6]*2^336 (mod p)
|
---|
89 | * Each of the seven values is called a 'limb'. Since the limbs are spaced only
|
---|
90 | * 56 bits apart, but are greater than 56 bits in length, the most significant
|
---|
91 | * bits of each limb overlap with the least significant bits of the next
|
---|
92 | *
|
---|
93 | * This representation is considered to be 'redundant' in the sense that
|
---|
94 | * intermediate values can each contain more than a 56-bit value in each limb.
|
---|
95 | * Reduction causes all but the final limb to be reduced to contain a value less
|
---|
96 | * than 2^56, with the final value represented allowed to be larger than 2^384,
|
---|
97 | * inasmuch as we can be sure that arithmetic overflow remains impossible. The
|
---|
98 | * reduced value must of course be congruent to the unreduced value.
|
---|
99 | *
|
---|
100 | * A field element with 64-bit limbs is an 'felem'. One with 128-bit limbs is a
|
---|
101 | * 'widefelem', featuring enough bits to store the result of a multiplication
|
---|
102 | * and even some further arithmetic without need for immediate reduction.
|
---|
103 | */
|
---|
104 |
|
---|
105 | #define NLIMBS 7
|
---|
106 |
|
---|
107 | typedef uint64_t limb;
|
---|
108 | typedef uint128_t widelimb;
|
---|
109 | typedef limb limb_aX __attribute((__aligned__(1)));
|
---|
110 | typedef limb felem[NLIMBS];
|
---|
111 | typedef widelimb widefelem[2*NLIMBS-1];
|
---|
112 |
|
---|
113 | static const limb bottom56bits = 0xffffffffffffff;
|
---|
114 |
|
---|
115 | /* Helper functions (de)serialising reduced field elements in little endian */
|
---|
116 | static void bin48_to_felem(felem out, const u8 in[48])
|
---|
117 | {
|
---|
118 | memset(out, 0, 56);
|
---|
119 | out[0] = (*((limb *) & in[0])) & bottom56bits;
|
---|
120 | out[1] = (*((limb_aX *) & in[7])) & bottom56bits;
|
---|
121 | out[2] = (*((limb_aX *) & in[14])) & bottom56bits;
|
---|
122 | out[3] = (*((limb_aX *) & in[21])) & bottom56bits;
|
---|
123 | out[4] = (*((limb_aX *) & in[28])) & bottom56bits;
|
---|
124 | out[5] = (*((limb_aX *) & in[35])) & bottom56bits;
|
---|
125 | memmove(&out[6], &in[42], 6);
|
---|
126 | }
|
---|
127 |
|
---|
128 | static void felem_to_bin48(u8 out[48], const felem in)
|
---|
129 | {
|
---|
130 | memset(out, 0, 48);
|
---|
131 | (*((limb *) & out[0])) |= (in[0] & bottom56bits);
|
---|
132 | (*((limb_aX *) & out[7])) |= (in[1] & bottom56bits);
|
---|
133 | (*((limb_aX *) & out[14])) |= (in[2] & bottom56bits);
|
---|
134 | (*((limb_aX *) & out[21])) |= (in[3] & bottom56bits);
|
---|
135 | (*((limb_aX *) & out[28])) |= (in[4] & bottom56bits);
|
---|
136 | (*((limb_aX *) & out[35])) |= (in[5] & bottom56bits);
|
---|
137 | memmove(&out[42], &in[6], 6);
|
---|
138 | }
|
---|
139 |
|
---|
140 | /* BN_to_felem converts an OpenSSL BIGNUM into an felem */
|
---|
141 | static int BN_to_felem(felem out, const BIGNUM *bn)
|
---|
142 | {
|
---|
143 | felem_bytearray b_out;
|
---|
144 | int num_bytes;
|
---|
145 |
|
---|
146 | if (BN_is_negative(bn)) {
|
---|
147 | ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
|
---|
148 | return 0;
|
---|
149 | }
|
---|
150 | num_bytes = BN_bn2lebinpad(bn, b_out, sizeof(b_out));
|
---|
151 | if (num_bytes < 0) {
|
---|
152 | ERR_raise(ERR_LIB_EC, EC_R_BIGNUM_OUT_OF_RANGE);
|
---|
153 | return 0;
|
---|
154 | }
|
---|
155 | bin48_to_felem(out, b_out);
|
---|
156 | return 1;
|
---|
157 | }
|
---|
158 |
|
---|
159 | /* felem_to_BN converts an felem into an OpenSSL BIGNUM */
|
---|
160 | static BIGNUM *felem_to_BN(BIGNUM *out, const felem in)
|
---|
161 | {
|
---|
162 | felem_bytearray b_out;
|
---|
163 |
|
---|
164 | felem_to_bin48(b_out, in);
|
---|
165 | return BN_lebin2bn(b_out, sizeof(b_out), out);
|
---|
166 | }
|
---|
167 |
|
---|
168 | /*-
|
---|
169 | * Field operations
|
---|
170 | * ----------------
|
---|
171 | */
|
---|
172 |
|
---|
173 | static void felem_one(felem out)
|
---|
174 | {
|
---|
175 | out[0] = 1;
|
---|
176 | memset(&out[1], 0, sizeof(limb) * (NLIMBS-1));
|
---|
177 | }
|
---|
178 |
|
---|
179 | static void felem_assign(felem out, const felem in)
|
---|
180 | {
|
---|
181 | memcpy(out, in, sizeof(felem));
|
---|
182 | }
|
---|
183 |
|
---|
184 | /* felem_sum64 sets out = out + in. */
|
---|
185 | static void felem_sum64(felem out, const felem in)
|
---|
186 | {
|
---|
187 | unsigned int i;
|
---|
188 |
|
---|
189 | for (i = 0; i < NLIMBS; i++)
|
---|
190 | out[i] += in[i];
|
---|
191 | }
|
---|
192 |
|
---|
193 | /* felem_scalar sets out = in * scalar */
|
---|
194 | static void felem_scalar(felem out, const felem in, limb scalar)
|
---|
195 | {
|
---|
196 | unsigned int i;
|
---|
197 |
|
---|
198 | for (i = 0; i < NLIMBS; i++)
|
---|
199 | out[i] = in[i] * scalar;
|
---|
200 | }
|
---|
201 |
|
---|
202 | /* felem_scalar64 sets out = out * scalar */
|
---|
203 | static void felem_scalar64(felem out, limb scalar)
|
---|
204 | {
|
---|
205 | unsigned int i;
|
---|
206 |
|
---|
207 | for (i = 0; i < NLIMBS; i++)
|
---|
208 | out[i] *= scalar;
|
---|
209 | }
|
---|
210 |
|
---|
211 | /* felem_scalar128 sets out = out * scalar */
|
---|
212 | static void felem_scalar128(widefelem out, limb scalar)
|
---|
213 | {
|
---|
214 | unsigned int i;
|
---|
215 |
|
---|
216 | for (i = 0; i < 2*NLIMBS-1; i++)
|
---|
217 | out[i] *= scalar;
|
---|
218 | }
|
---|
219 |
|
---|
220 | /*-
|
---|
221 | * felem_neg sets |out| to |-in|
|
---|
222 | * On entry:
|
---|
223 | * in[i] < 2^60 - 2^29
|
---|
224 | * On exit:
|
---|
225 | * out[i] < 2^60
|
---|
226 | */
|
---|
227 | static void felem_neg(felem out, const felem in)
|
---|
228 | {
|
---|
229 | /*
|
---|
230 | * In order to prevent underflow, we add a multiple of p before subtracting.
|
---|
231 | * Use telescopic sums to represent 2^12 * p redundantly with each limb
|
---|
232 | * of the form 2^60 + ...
|
---|
233 | */
|
---|
234 | static const limb two60m52m4 = (((limb) 1) << 60)
|
---|
235 | - (((limb) 1) << 52)
|
---|
236 | - (((limb) 1) << 4);
|
---|
237 | static const limb two60p44m12 = (((limb) 1) << 60)
|
---|
238 | + (((limb) 1) << 44)
|
---|
239 | - (((limb) 1) << 12);
|
---|
240 | static const limb two60m28m4 = (((limb) 1) << 60)
|
---|
241 | - (((limb) 1) << 28)
|
---|
242 | - (((limb) 1) << 4);
|
---|
243 | static const limb two60m4 = (((limb) 1) << 60)
|
---|
244 | - (((limb) 1) << 4);
|
---|
245 |
|
---|
246 | out[0] = two60p44m12 - in[0];
|
---|
247 | out[1] = two60m52m4 - in[1];
|
---|
248 | out[2] = two60m28m4 - in[2];
|
---|
249 | out[3] = two60m4 - in[3];
|
---|
250 | out[4] = two60m4 - in[4];
|
---|
251 | out[5] = two60m4 - in[5];
|
---|
252 | out[6] = two60m4 - in[6];
|
---|
253 | }
|
---|
254 |
|
---|
255 | /*-
|
---|
256 | * felem_diff64 subtracts |in| from |out|
|
---|
257 | * On entry:
|
---|
258 | * in[i] < 2^60 - 2^52 - 2^4
|
---|
259 | * On exit:
|
---|
260 | * out[i] < out_orig[i] + 2^60 + 2^44
|
---|
261 | */
|
---|
262 | static void felem_diff64(felem out, const felem in)
|
---|
263 | {
|
---|
264 | /*
|
---|
265 | * In order to prevent underflow, we add a multiple of p before subtracting.
|
---|
266 | * Use telescopic sums to represent 2^12 * p redundantly with each limb
|
---|
267 | * of the form 2^60 + ...
|
---|
268 | */
|
---|
269 |
|
---|
270 | static const limb two60m52m4 = (((limb) 1) << 60)
|
---|
271 | - (((limb) 1) << 52)
|
---|
272 | - (((limb) 1) << 4);
|
---|
273 | static const limb two60p44m12 = (((limb) 1) << 60)
|
---|
274 | + (((limb) 1) << 44)
|
---|
275 | - (((limb) 1) << 12);
|
---|
276 | static const limb two60m28m4 = (((limb) 1) << 60)
|
---|
277 | - (((limb) 1) << 28)
|
---|
278 | - (((limb) 1) << 4);
|
---|
279 | static const limb two60m4 = (((limb) 1) << 60)
|
---|
280 | - (((limb) 1) << 4);
|
---|
281 |
|
---|
282 | out[0] += two60p44m12 - in[0];
|
---|
283 | out[1] += two60m52m4 - in[1];
|
---|
284 | out[2] += two60m28m4 - in[2];
|
---|
285 | out[3] += two60m4 - in[3];
|
---|
286 | out[4] += two60m4 - in[4];
|
---|
287 | out[5] += two60m4 - in[5];
|
---|
288 | out[6] += two60m4 - in[6];
|
---|
289 | }
|
---|
290 |
|
---|
291 | /*
|
---|
292 | * in[i] < 2^63
|
---|
293 | * out[i] < out_orig[i] + 2^64 + 2^48
|
---|
294 | */
|
---|
295 | static void felem_diff_128_64(widefelem out, const felem in)
|
---|
296 | {
|
---|
297 | /*
|
---|
298 | * In order to prevent underflow, we add a multiple of p before subtracting.
|
---|
299 | * Use telescopic sums to represent 2^16 * p redundantly with each limb
|
---|
300 | * of the form 2^64 + ...
|
---|
301 | */
|
---|
302 |
|
---|
303 | static const widelimb two64m56m8 = (((widelimb) 1) << 64)
|
---|
304 | - (((widelimb) 1) << 56)
|
---|
305 | - (((widelimb) 1) << 8);
|
---|
306 | static const widelimb two64m32m8 = (((widelimb) 1) << 64)
|
---|
307 | - (((widelimb) 1) << 32)
|
---|
308 | - (((widelimb) 1) << 8);
|
---|
309 | static const widelimb two64m8 = (((widelimb) 1) << 64)
|
---|
310 | - (((widelimb) 1) << 8);
|
---|
311 | static const widelimb two64p48m16 = (((widelimb) 1) << 64)
|
---|
312 | + (((widelimb) 1) << 48)
|
---|
313 | - (((widelimb) 1) << 16);
|
---|
314 | unsigned int i;
|
---|
315 |
|
---|
316 | out[0] += two64p48m16;
|
---|
317 | out[1] += two64m56m8;
|
---|
318 | out[2] += two64m32m8;
|
---|
319 | out[3] += two64m8;
|
---|
320 | out[4] += two64m8;
|
---|
321 | out[5] += two64m8;
|
---|
322 | out[6] += two64m8;
|
---|
323 |
|
---|
324 | for (i = 0; i < NLIMBS; i++)
|
---|
325 | out[i] -= in[i];
|
---|
326 | }
|
---|
327 |
|
---|
328 | /*
|
---|
329 | * in[i] < 2^127 - 2^119 - 2^71
|
---|
330 | * out[i] < out_orig[i] + 2^127 + 2^111
|
---|
331 | */
|
---|
332 | static void felem_diff128(widefelem out, const widefelem in)
|
---|
333 | {
|
---|
334 | /*
|
---|
335 | * In order to prevent underflow, we add a multiple of p before subtracting.
|
---|
336 | * Use telescopic sums to represent 2^415 * p redundantly with each limb
|
---|
337 | * of the form 2^127 + ...
|
---|
338 | */
|
---|
339 |
|
---|
340 | static const widelimb two127 = ((widelimb) 1) << 127;
|
---|
341 | static const widelimb two127m71 = (((widelimb) 1) << 127)
|
---|
342 | - (((widelimb) 1) << 71);
|
---|
343 | static const widelimb two127p111m79m71 = (((widelimb) 1) << 127)
|
---|
344 | + (((widelimb) 1) << 111)
|
---|
345 | - (((widelimb) 1) << 79)
|
---|
346 | - (((widelimb) 1) << 71);
|
---|
347 | static const widelimb two127m119m71 = (((widelimb) 1) << 127)
|
---|
348 | - (((widelimb) 1) << 119)
|
---|
349 | - (((widelimb) 1) << 71);
|
---|
350 | static const widelimb two127m95m71 = (((widelimb) 1) << 127)
|
---|
351 | - (((widelimb) 1) << 95)
|
---|
352 | - (((widelimb) 1) << 71);
|
---|
353 | unsigned int i;
|
---|
354 |
|
---|
355 | out[0] += two127;
|
---|
356 | out[1] += two127m71;
|
---|
357 | out[2] += two127m71;
|
---|
358 | out[3] += two127m71;
|
---|
359 | out[4] += two127m71;
|
---|
360 | out[5] += two127m71;
|
---|
361 | out[6] += two127p111m79m71;
|
---|
362 | out[7] += two127m119m71;
|
---|
363 | out[8] += two127m95m71;
|
---|
364 | out[9] += two127m71;
|
---|
365 | out[10] += two127m71;
|
---|
366 | out[11] += two127m71;
|
---|
367 | out[12] += two127m71;
|
---|
368 |
|
---|
369 | for (i = 0; i < 2*NLIMBS-1; i++)
|
---|
370 | out[i] -= in[i];
|
---|
371 | }
|
---|
372 |
|
---|
373 | static void felem_square_ref(widefelem out, const felem in)
|
---|
374 | {
|
---|
375 | felem inx2;
|
---|
376 | felem_scalar(inx2, in, 2);
|
---|
377 |
|
---|
378 | out[0] = ((uint128_t) in[0]) * in[0];
|
---|
379 |
|
---|
380 | out[1] = ((uint128_t) in[0]) * inx2[1];
|
---|
381 |
|
---|
382 | out[2] = ((uint128_t) in[0]) * inx2[2]
|
---|
383 | + ((uint128_t) in[1]) * in[1];
|
---|
384 |
|
---|
385 | out[3] = ((uint128_t) in[0]) * inx2[3]
|
---|
386 | + ((uint128_t) in[1]) * inx2[2];
|
---|
387 |
|
---|
388 | out[4] = ((uint128_t) in[0]) * inx2[4]
|
---|
389 | + ((uint128_t) in[1]) * inx2[3]
|
---|
390 | + ((uint128_t) in[2]) * in[2];
|
---|
391 |
|
---|
392 | out[5] = ((uint128_t) in[0]) * inx2[5]
|
---|
393 | + ((uint128_t) in[1]) * inx2[4]
|
---|
394 | + ((uint128_t) in[2]) * inx2[3];
|
---|
395 |
|
---|
396 | out[6] = ((uint128_t) in[0]) * inx2[6]
|
---|
397 | + ((uint128_t) in[1]) * inx2[5]
|
---|
398 | + ((uint128_t) in[2]) * inx2[4]
|
---|
399 | + ((uint128_t) in[3]) * in[3];
|
---|
400 |
|
---|
401 | out[7] = ((uint128_t) in[1]) * inx2[6]
|
---|
402 | + ((uint128_t) in[2]) * inx2[5]
|
---|
403 | + ((uint128_t) in[3]) * inx2[4];
|
---|
404 |
|
---|
405 | out[8] = ((uint128_t) in[2]) * inx2[6]
|
---|
406 | + ((uint128_t) in[3]) * inx2[5]
|
---|
407 | + ((uint128_t) in[4]) * in[4];
|
---|
408 |
|
---|
409 | out[9] = ((uint128_t) in[3]) * inx2[6]
|
---|
410 | + ((uint128_t) in[4]) * inx2[5];
|
---|
411 |
|
---|
412 | out[10] = ((uint128_t) in[4]) * inx2[6]
|
---|
413 | + ((uint128_t) in[5]) * in[5];
|
---|
414 |
|
---|
415 | out[11] = ((uint128_t) in[5]) * inx2[6];
|
---|
416 |
|
---|
417 | out[12] = ((uint128_t) in[6]) * in[6];
|
---|
418 | }
|
---|
419 |
|
---|
420 | static void felem_mul_ref(widefelem out, const felem in1, const felem in2)
|
---|
421 | {
|
---|
422 | out[0] = ((uint128_t) in1[0]) * in2[0];
|
---|
423 |
|
---|
424 | out[1] = ((uint128_t) in1[0]) * in2[1]
|
---|
425 | + ((uint128_t) in1[1]) * in2[0];
|
---|
426 |
|
---|
427 | out[2] = ((uint128_t) in1[0]) * in2[2]
|
---|
428 | + ((uint128_t) in1[1]) * in2[1]
|
---|
429 | + ((uint128_t) in1[2]) * in2[0];
|
---|
430 |
|
---|
431 | out[3] = ((uint128_t) in1[0]) * in2[3]
|
---|
432 | + ((uint128_t) in1[1]) * in2[2]
|
---|
433 | + ((uint128_t) in1[2]) * in2[1]
|
---|
434 | + ((uint128_t) in1[3]) * in2[0];
|
---|
435 |
|
---|
436 | out[4] = ((uint128_t) in1[0]) * in2[4]
|
---|
437 | + ((uint128_t) in1[1]) * in2[3]
|
---|
438 | + ((uint128_t) in1[2]) * in2[2]
|
---|
439 | + ((uint128_t) in1[3]) * in2[1]
|
---|
440 | + ((uint128_t) in1[4]) * in2[0];
|
---|
441 |
|
---|
442 | out[5] = ((uint128_t) in1[0]) * in2[5]
|
---|
443 | + ((uint128_t) in1[1]) * in2[4]
|
---|
444 | + ((uint128_t) in1[2]) * in2[3]
|
---|
445 | + ((uint128_t) in1[3]) * in2[2]
|
---|
446 | + ((uint128_t) in1[4]) * in2[1]
|
---|
447 | + ((uint128_t) in1[5]) * in2[0];
|
---|
448 |
|
---|
449 | out[6] = ((uint128_t) in1[0]) * in2[6]
|
---|
450 | + ((uint128_t) in1[1]) * in2[5]
|
---|
451 | + ((uint128_t) in1[2]) * in2[4]
|
---|
452 | + ((uint128_t) in1[3]) * in2[3]
|
---|
453 | + ((uint128_t) in1[4]) * in2[2]
|
---|
454 | + ((uint128_t) in1[5]) * in2[1]
|
---|
455 | + ((uint128_t) in1[6]) * in2[0];
|
---|
456 |
|
---|
457 | out[7] = ((uint128_t) in1[1]) * in2[6]
|
---|
458 | + ((uint128_t) in1[2]) * in2[5]
|
---|
459 | + ((uint128_t) in1[3]) * in2[4]
|
---|
460 | + ((uint128_t) in1[4]) * in2[3]
|
---|
461 | + ((uint128_t) in1[5]) * in2[2]
|
---|
462 | + ((uint128_t) in1[6]) * in2[1];
|
---|
463 |
|
---|
464 | out[8] = ((uint128_t) in1[2]) * in2[6]
|
---|
465 | + ((uint128_t) in1[3]) * in2[5]
|
---|
466 | + ((uint128_t) in1[4]) * in2[4]
|
---|
467 | + ((uint128_t) in1[5]) * in2[3]
|
---|
468 | + ((uint128_t) in1[6]) * in2[2];
|
---|
469 |
|
---|
470 | out[9] = ((uint128_t) in1[3]) * in2[6]
|
---|
471 | + ((uint128_t) in1[4]) * in2[5]
|
---|
472 | + ((uint128_t) in1[5]) * in2[4]
|
---|
473 | + ((uint128_t) in1[6]) * in2[3];
|
---|
474 |
|
---|
475 | out[10] = ((uint128_t) in1[4]) * in2[6]
|
---|
476 | + ((uint128_t) in1[5]) * in2[5]
|
---|
477 | + ((uint128_t) in1[6]) * in2[4];
|
---|
478 |
|
---|
479 | out[11] = ((uint128_t) in1[5]) * in2[6]
|
---|
480 | + ((uint128_t) in1[6]) * in2[5];
|
---|
481 |
|
---|
482 | out[12] = ((uint128_t) in1[6]) * in2[6];
|
---|
483 | }
|
---|
484 |
|
---|
485 | /*-
|
---|
486 | * Reduce thirteen 128-bit coefficients to seven 64-bit coefficients.
|
---|
487 | * in[i] < 2^128 - 2^125
|
---|
488 | * out[i] < 2^56 for i < 6,
|
---|
489 | * out[6] <= 2^48
|
---|
490 | *
|
---|
491 | * The technique in use here stems from the format of the prime modulus:
|
---|
492 | * P384 = 2^384 - delta
|
---|
493 | *
|
---|
494 | * Thus we can reduce numbers of the form (X + 2^384 * Y) by substituting
|
---|
495 | * them with (X + delta Y), with delta = 2^128 + 2^96 + (-2^32 + 1). These
|
---|
496 | * coefficients are still quite large, and so we repeatedly apply this
|
---|
497 | * technique on high-order bits in order to guarantee the desired bounds on
|
---|
498 | * the size of our output.
|
---|
499 | *
|
---|
500 | * The three phases of elimination are as follows:
|
---|
501 | * [1]: Y = 2^120 (in[12] | in[11] | in[10] | in[9])
|
---|
502 | * [2]: Y = 2^8 (acc[8] | acc[7])
|
---|
503 | * [3]: Y = 2^48 (acc[6] >> 48)
|
---|
504 | * (Where a | b | c | d = (2^56)^3 a + (2^56)^2 b + (2^56) c + d)
|
---|
505 | */
|
---|
506 | static void felem_reduce(felem out, const widefelem in)
|
---|
507 | {
|
---|
508 | /*
|
---|
509 | * In order to prevent underflow, we add a multiple of p before subtracting.
|
---|
510 | * Use telescopic sums to represent 2^76 * p redundantly with each limb
|
---|
511 | * of the form 2^124 + ...
|
---|
512 | */
|
---|
513 | static const widelimb two124m68 = (((widelimb) 1) << 124)
|
---|
514 | - (((widelimb) 1) << 68);
|
---|
515 | static const widelimb two124m116m68 = (((widelimb) 1) << 124)
|
---|
516 | - (((widelimb) 1) << 116)
|
---|
517 | - (((widelimb) 1) << 68);
|
---|
518 | static const widelimb two124p108m76 = (((widelimb) 1) << 124)
|
---|
519 | + (((widelimb) 1) << 108)
|
---|
520 | - (((widelimb) 1) << 76);
|
---|
521 | static const widelimb two124m92m68 = (((widelimb) 1) << 124)
|
---|
522 | - (((widelimb) 1) << 92)
|
---|
523 | - (((widelimb) 1) << 68);
|
---|
524 | widelimb temp, acc[9];
|
---|
525 | unsigned int i;
|
---|
526 |
|
---|
527 | memcpy(acc, in, sizeof(widelimb) * 9);
|
---|
528 |
|
---|
529 | acc[0] += two124p108m76;
|
---|
530 | acc[1] += two124m116m68;
|
---|
531 | acc[2] += two124m92m68;
|
---|
532 | acc[3] += two124m68;
|
---|
533 | acc[4] += two124m68;
|
---|
534 | acc[5] += two124m68;
|
---|
535 | acc[6] += two124m68;
|
---|
536 |
|
---|
537 | /* [1]: Eliminate in[9], ..., in[12] */
|
---|
538 | acc[8] += in[12] >> 32;
|
---|
539 | acc[7] += (in[12] & 0xffffffff) << 24;
|
---|
540 | acc[7] += in[12] >> 8;
|
---|
541 | acc[6] += (in[12] & 0xff) << 48;
|
---|
542 | acc[6] -= in[12] >> 16;
|
---|
543 | acc[5] -= (in[12] & 0xffff) << 40;
|
---|
544 | acc[6] += in[12] >> 48;
|
---|
545 | acc[5] += (in[12] & 0xffffffffffff) << 8;
|
---|
546 |
|
---|
547 | acc[7] += in[11] >> 32;
|
---|
548 | acc[6] += (in[11] & 0xffffffff) << 24;
|
---|
549 | acc[6] += in[11] >> 8;
|
---|
550 | acc[5] += (in[11] & 0xff) << 48;
|
---|
551 | acc[5] -= in[11] >> 16;
|
---|
552 | acc[4] -= (in[11] & 0xffff) << 40;
|
---|
553 | acc[5] += in[11] >> 48;
|
---|
554 | acc[4] += (in[11] & 0xffffffffffff) << 8;
|
---|
555 |
|
---|
556 | acc[6] += in[10] >> 32;
|
---|
557 | acc[5] += (in[10] & 0xffffffff) << 24;
|
---|
558 | acc[5] += in[10] >> 8;
|
---|
559 | acc[4] += (in[10] & 0xff) << 48;
|
---|
560 | acc[4] -= in[10] >> 16;
|
---|
561 | acc[3] -= (in[10] & 0xffff) << 40;
|
---|
562 | acc[4] += in[10] >> 48;
|
---|
563 | acc[3] += (in[10] & 0xffffffffffff) << 8;
|
---|
564 |
|
---|
565 | acc[5] += in[9] >> 32;
|
---|
566 | acc[4] += (in[9] & 0xffffffff) << 24;
|
---|
567 | acc[4] += in[9] >> 8;
|
---|
568 | acc[3] += (in[9] & 0xff) << 48;
|
---|
569 | acc[3] -= in[9] >> 16;
|
---|
570 | acc[2] -= (in[9] & 0xffff) << 40;
|
---|
571 | acc[3] += in[9] >> 48;
|
---|
572 | acc[2] += (in[9] & 0xffffffffffff) << 8;
|
---|
573 |
|
---|
574 | /*
|
---|
575 | * [2]: Eliminate acc[7], acc[8], that is the 7 and eighth limbs, as
|
---|
576 | * well as the contributions made from eliminating higher limbs.
|
---|
577 | * acc[7] < in[7] + 2^120 + 2^56 < in[7] + 2^121
|
---|
578 | * acc[8] < in[8] + 2^96
|
---|
579 | */
|
---|
580 | acc[4] += acc[8] >> 32;
|
---|
581 | acc[3] += (acc[8] & 0xffffffff) << 24;
|
---|
582 | acc[3] += acc[8] >> 8;
|
---|
583 | acc[2] += (acc[8] & 0xff) << 48;
|
---|
584 | acc[2] -= acc[8] >> 16;
|
---|
585 | acc[1] -= (acc[8] & 0xffff) << 40;
|
---|
586 | acc[2] += acc[8] >> 48;
|
---|
587 | acc[1] += (acc[8] & 0xffffffffffff) << 8;
|
---|
588 |
|
---|
589 | acc[3] += acc[7] >> 32;
|
---|
590 | acc[2] += (acc[7] & 0xffffffff) << 24;
|
---|
591 | acc[2] += acc[7] >> 8;
|
---|
592 | acc[1] += (acc[7] & 0xff) << 48;
|
---|
593 | acc[1] -= acc[7] >> 16;
|
---|
594 | acc[0] -= (acc[7] & 0xffff) << 40;
|
---|
595 | acc[1] += acc[7] >> 48;
|
---|
596 | acc[0] += (acc[7] & 0xffffffffffff) << 8;
|
---|
597 |
|
---|
598 | /*-
|
---|
599 | * acc[k] < in[k] + 2^124 + 2^121
|
---|
600 | * < in[k] + 2^125
|
---|
601 | * < 2^128, for k <= 6
|
---|
602 | */
|
---|
603 |
|
---|
604 | /*
|
---|
605 | * Carry 4 -> 5 -> 6
|
---|
606 | * This has the effect of ensuring that these more significant limbs
|
---|
607 | * will be small in value after eliminating high bits from acc[6].
|
---|
608 | */
|
---|
609 | acc[5] += acc[4] >> 56;
|
---|
610 | acc[4] &= 0x00ffffffffffffff;
|
---|
611 |
|
---|
612 | acc[6] += acc[5] >> 56;
|
---|
613 | acc[5] &= 0x00ffffffffffffff;
|
---|
614 |
|
---|
615 | /*-
|
---|
616 | * acc[6] < in[6] + 2^124 + 2^121 + 2^72 + 2^16
|
---|
617 | * < in[6] + 2^125
|
---|
618 | * < 2^128
|
---|
619 | */
|
---|
620 |
|
---|
621 | /* [3]: Eliminate high bits of acc[6] */
|
---|
622 | temp = acc[6] >> 48;
|
---|
623 | acc[6] &= 0x0000ffffffffffff;
|
---|
624 |
|
---|
625 | /* temp < 2^80 */
|
---|
626 |
|
---|
627 | acc[3] += temp >> 40;
|
---|
628 | acc[2] += (temp & 0xffffffffff) << 16;
|
---|
629 | acc[2] += temp >> 16;
|
---|
630 | acc[1] += (temp & 0xffff) << 40;
|
---|
631 | acc[1] -= temp >> 24;
|
---|
632 | acc[0] -= (temp & 0xffffff) << 32;
|
---|
633 | acc[0] += temp;
|
---|
634 |
|
---|
635 | /*-
|
---|
636 | * acc[k] < acc_old[k] + 2^64 + 2^56
|
---|
637 | * < in[k] + 2^124 + 2^121 + 2^72 + 2^64 + 2^56 + 2^16 , k < 4
|
---|
638 | */
|
---|
639 |
|
---|
640 | /* Carry 0 -> 1 -> 2 -> 3 -> 4 -> 5 -> 6 */
|
---|
641 | acc[1] += acc[0] >> 56; /* acc[1] < acc_old[1] + 2^72 */
|
---|
642 | acc[0] &= 0x00ffffffffffffff;
|
---|
643 |
|
---|
644 | acc[2] += acc[1] >> 56; /* acc[2] < acc_old[2] + 2^72 + 2^16 */
|
---|
645 | acc[1] &= 0x00ffffffffffffff;
|
---|
646 |
|
---|
647 | acc[3] += acc[2] >> 56; /* acc[3] < acc_old[3] + 2^72 + 2^16 */
|
---|
648 | acc[2] &= 0x00ffffffffffffff;
|
---|
649 |
|
---|
650 | /*-
|
---|
651 | * acc[k] < acc_old[k] + 2^72 + 2^16
|
---|
652 | * < in[k] + 2^124 + 2^121 + 2^73 + 2^64 + 2^56 + 2^17
|
---|
653 | * < in[k] + 2^125
|
---|
654 | * < 2^128 , k < 4
|
---|
655 | */
|
---|
656 |
|
---|
657 | acc[4] += acc[3] >> 56; /*-
|
---|
658 | * acc[4] < acc_old[4] + 2^72 + 2^16
|
---|
659 | * < 2^72 + 2^56 + 2^16
|
---|
660 | */
|
---|
661 | acc[3] &= 0x00ffffffffffffff;
|
---|
662 |
|
---|
663 | acc[5] += acc[4] >> 56; /*-
|
---|
664 | * acc[5] < acc_old[5] + 2^16 + 1
|
---|
665 | * < 2^56 + 2^16 + 1
|
---|
666 | */
|
---|
667 | acc[4] &= 0x00ffffffffffffff;
|
---|
668 |
|
---|
669 | acc[6] += acc[5] >> 56; /* acc[6] < 2^48 + 1 <= 2^48 */
|
---|
670 | acc[5] &= 0x00ffffffffffffff;
|
---|
671 |
|
---|
672 | for (i = 0; i < NLIMBS; i++)
|
---|
673 | out[i] = acc[i];
|
---|
674 | }
|
---|
675 |
|
---|
676 | #if defined(ECP_NISTP384_ASM)
|
---|
677 | static void felem_square_wrapper(widefelem out, const felem in);
|
---|
678 | static void felem_mul_wrapper(widefelem out, const felem in1, const felem in2);
|
---|
679 |
|
---|
680 | static void (*felem_square_p)(widefelem out, const felem in) =
|
---|
681 | felem_square_wrapper;
|
---|
682 | static void (*felem_mul_p)(widefelem out, const felem in1, const felem in2) =
|
---|
683 | felem_mul_wrapper;
|
---|
684 |
|
---|
685 | void p384_felem_square(widefelem out, const felem in);
|
---|
686 | void p384_felem_mul(widefelem out, const felem in1, const felem in2);
|
---|
687 |
|
---|
688 | # if defined(_ARCH_PPC64)
|
---|
689 | # include "crypto/ppc_arch.h"
|
---|
690 | # endif
|
---|
691 |
|
---|
692 | static void felem_select(void)
|
---|
693 | {
|
---|
694 | # if defined(_ARCH_PPC64)
|
---|
695 | if ((OPENSSL_ppccap_P & PPC_MADD300) && (OPENSSL_ppccap_P & PPC_ALTIVEC)) {
|
---|
696 | felem_square_p = p384_felem_square;
|
---|
697 | felem_mul_p = p384_felem_mul;
|
---|
698 |
|
---|
699 | return;
|
---|
700 | }
|
---|
701 | # endif
|
---|
702 |
|
---|
703 | /* Default */
|
---|
704 | felem_square_p = felem_square_ref;
|
---|
705 | felem_mul_p = felem_mul_ref;
|
---|
706 | }
|
---|
707 |
|
---|
708 | static void felem_square_wrapper(widefelem out, const felem in)
|
---|
709 | {
|
---|
710 | felem_select();
|
---|
711 | felem_square_p(out, in);
|
---|
712 | }
|
---|
713 |
|
---|
714 | static void felem_mul_wrapper(widefelem out, const felem in1, const felem in2)
|
---|
715 | {
|
---|
716 | felem_select();
|
---|
717 | felem_mul_p(out, in1, in2);
|
---|
718 | }
|
---|
719 |
|
---|
720 | # define felem_square felem_square_p
|
---|
721 | # define felem_mul felem_mul_p
|
---|
722 | #else
|
---|
723 | # define felem_square felem_square_ref
|
---|
724 | # define felem_mul felem_mul_ref
|
---|
725 | #endif
|
---|
726 |
|
---|
727 | static ossl_inline void felem_square_reduce(felem out, const felem in)
|
---|
728 | {
|
---|
729 | widefelem tmp;
|
---|
730 |
|
---|
731 | felem_square(tmp, in);
|
---|
732 | felem_reduce(out, tmp);
|
---|
733 | }
|
---|
734 |
|
---|
735 | static ossl_inline void felem_mul_reduce(felem out, const felem in1, const felem in2)
|
---|
736 | {
|
---|
737 | widefelem tmp;
|
---|
738 |
|
---|
739 | felem_mul(tmp, in1, in2);
|
---|
740 | felem_reduce(out, tmp);
|
---|
741 | }
|
---|
742 |
|
---|
743 | /*-
|
---|
744 | * felem_inv calculates |out| = |in|^{-1}
|
---|
745 | *
|
---|
746 | * Based on Fermat's Little Theorem:
|
---|
747 | * a^p = a (mod p)
|
---|
748 | * a^{p-1} = 1 (mod p)
|
---|
749 | * a^{p-2} = a^{-1} (mod p)
|
---|
750 | */
|
---|
751 | static void felem_inv(felem out, const felem in)
|
---|
752 | {
|
---|
753 | felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6;
|
---|
754 | unsigned int i = 0;
|
---|
755 |
|
---|
756 | felem_square_reduce(ftmp, in); /* 2^1 */
|
---|
757 | felem_mul_reduce(ftmp, ftmp, in); /* 2^1 + 2^0 */
|
---|
758 | felem_assign(ftmp2, ftmp);
|
---|
759 |
|
---|
760 | felem_square_reduce(ftmp, ftmp); /* 2^2 + 2^1 */
|
---|
761 | felem_mul_reduce(ftmp, ftmp, in); /* 2^2 + 2^1 * 2^0 */
|
---|
762 | felem_assign(ftmp3, ftmp);
|
---|
763 |
|
---|
764 | for (i = 0; i < 3; i++)
|
---|
765 | felem_square_reduce(ftmp, ftmp); /* 2^5 + 2^4 + 2^3 */
|
---|
766 | felem_mul_reduce(ftmp, ftmp3, ftmp); /* 2^5 + 2^4 + 2^3 + 2^2 + 2^1 + 2^0 */
|
---|
767 | felem_assign(ftmp4, ftmp);
|
---|
768 |
|
---|
769 | for (i = 0; i < 6; i++)
|
---|
770 | felem_square_reduce(ftmp, ftmp); /* 2^11 + ... + 2^6 */
|
---|
771 | felem_mul_reduce(ftmp, ftmp4, ftmp); /* 2^11 + ... + 2^0 */
|
---|
772 |
|
---|
773 | for (i = 0; i < 3; i++)
|
---|
774 | felem_square_reduce(ftmp, ftmp); /* 2^14 + ... + 2^3 */
|
---|
775 | felem_mul_reduce(ftmp, ftmp3, ftmp); /* 2^14 + ... + 2^0 */
|
---|
776 | felem_assign(ftmp5, ftmp);
|
---|
777 |
|
---|
778 | for (i = 0; i < 15; i++)
|
---|
779 | felem_square_reduce(ftmp, ftmp); /* 2^29 + ... + 2^15 */
|
---|
780 | felem_mul_reduce(ftmp, ftmp5, ftmp); /* 2^29 + ... + 2^0 */
|
---|
781 | felem_assign(ftmp6, ftmp);
|
---|
782 |
|
---|
783 | for (i = 0; i < 30; i++)
|
---|
784 | felem_square_reduce(ftmp, ftmp); /* 2^59 + ... + 2^30 */
|
---|
785 | felem_mul_reduce(ftmp, ftmp6, ftmp); /* 2^59 + ... + 2^0 */
|
---|
786 | felem_assign(ftmp4, ftmp);
|
---|
787 |
|
---|
788 | for (i = 0; i < 60; i++)
|
---|
789 | felem_square_reduce(ftmp, ftmp); /* 2^119 + ... + 2^60 */
|
---|
790 | felem_mul_reduce(ftmp, ftmp4, ftmp); /* 2^119 + ... + 2^0 */
|
---|
791 | felem_assign(ftmp4, ftmp);
|
---|
792 |
|
---|
793 | for (i = 0; i < 120; i++)
|
---|
794 | felem_square_reduce(ftmp, ftmp); /* 2^239 + ... + 2^120 */
|
---|
795 | felem_mul_reduce(ftmp, ftmp4, ftmp); /* 2^239 + ... + 2^0 */
|
---|
796 |
|
---|
797 | for (i = 0; i < 15; i++)
|
---|
798 | felem_square_reduce(ftmp, ftmp); /* 2^254 + ... + 2^15 */
|
---|
799 | felem_mul_reduce(ftmp, ftmp5, ftmp); /* 2^254 + ... + 2^0 */
|
---|
800 |
|
---|
801 | for (i = 0; i < 31; i++)
|
---|
802 | felem_square_reduce(ftmp, ftmp); /* 2^285 + ... + 2^31 */
|
---|
803 | felem_mul_reduce(ftmp, ftmp6, ftmp); /* 2^285 + ... + 2^31 + 2^29 + ... + 2^0 */
|
---|
804 |
|
---|
805 | for (i = 0; i < 2; i++)
|
---|
806 | felem_square_reduce(ftmp, ftmp); /* 2^287 + ... + 2^33 + 2^31 + ... + 2^2 */
|
---|
807 | felem_mul_reduce(ftmp, ftmp2, ftmp); /* 2^287 + ... + 2^33 + 2^31 + ... + 2^0 */
|
---|
808 |
|
---|
809 | for (i = 0; i < 94; i++)
|
---|
810 | felem_square_reduce(ftmp, ftmp); /* 2^381 + ... + 2^127 + 2^125 + ... + 2^94 */
|
---|
811 | felem_mul_reduce(ftmp, ftmp6, ftmp); /* 2^381 + ... + 2^127 + 2^125 + ... + 2^94 + 2^29 + ... + 2^0 */
|
---|
812 |
|
---|
813 | for (i = 0; i < 2; i++)
|
---|
814 | felem_square_reduce(ftmp, ftmp); /* 2^383 + ... + 2^129 + 2^127 + ... + 2^96 + 2^31 + ... + 2^2 */
|
---|
815 | felem_mul_reduce(ftmp, in, ftmp); /* 2^383 + ... + 2^129 + 2^127 + ... + 2^96 + 2^31 + ... + 2^2 + 2^0 */
|
---|
816 |
|
---|
817 | memcpy(out, ftmp, sizeof(felem));
|
---|
818 | }
|
---|
819 |
|
---|
820 | /*
|
---|
821 | * Zero-check: returns a limb with all bits set if |in| == 0 (mod p)
|
---|
822 | * and 0 otherwise. We know that field elements are reduced to
|
---|
823 | * 0 < in < 2p, so we only need to check two cases:
|
---|
824 | * 0 and 2^384 - 2^128 - 2^96 + 2^32 - 1
|
---|
825 | * in[k] < 2^56, k < 6
|
---|
826 | * in[6] <= 2^48
|
---|
827 | */
|
---|
828 | static limb felem_is_zero(const felem in)
|
---|
829 | {
|
---|
830 | limb zero, p384;
|
---|
831 |
|
---|
832 | zero = in[0] | in[1] | in[2] | in[3] | in[4] | in[5] | in[6];
|
---|
833 | zero = ((int64_t) (zero) - 1) >> 63;
|
---|
834 | p384 = (in[0] ^ 0x000000ffffffff) | (in[1] ^ 0xffff0000000000)
|
---|
835 | | (in[2] ^ 0xfffffffffeffff) | (in[3] ^ 0xffffffffffffff)
|
---|
836 | | (in[4] ^ 0xffffffffffffff) | (in[5] ^ 0xffffffffffffff)
|
---|
837 | | (in[6] ^ 0xffffffffffff);
|
---|
838 | p384 = ((int64_t) (p384) - 1) >> 63;
|
---|
839 |
|
---|
840 | return (zero | p384);
|
---|
841 | }
|
---|
842 |
|
---|
843 | static int felem_is_zero_int(const void *in)
|
---|
844 | {
|
---|
845 | return (int)(felem_is_zero(in) & ((limb) 1));
|
---|
846 | }
|
---|
847 |
|
---|
848 | /*-
|
---|
849 | * felem_contract converts |in| to its unique, minimal representation.
|
---|
850 | * Assume we've removed all redundant bits.
|
---|
851 | * On entry:
|
---|
852 | * in[k] < 2^56, k < 6
|
---|
853 | * in[6] <= 2^48
|
---|
854 | */
|
---|
855 | static void felem_contract(felem out, const felem in)
|
---|
856 | {
|
---|
857 | static const int64_t two56 = ((limb) 1) << 56;
|
---|
858 |
|
---|
859 | /*
|
---|
860 | * We know for a fact that 0 <= |in| < 2*p, for p = 2^384 - 2^128 - 2^96 + 2^32 - 1
|
---|
861 | * Perform two successive, idempotent subtractions to reduce if |in| >= p.
|
---|
862 | */
|
---|
863 |
|
---|
864 | int64_t tmp[NLIMBS], cond[5], a;
|
---|
865 | unsigned int i;
|
---|
866 |
|
---|
867 | memcpy(tmp, in, sizeof(felem));
|
---|
868 |
|
---|
869 | /* Case 1: a = 1 iff |in| >= 2^384 */
|
---|
870 | a = (in[6] >> 48);
|
---|
871 | tmp[0] += a;
|
---|
872 | tmp[0] -= a << 32;
|
---|
873 | tmp[1] += a << 40;
|
---|
874 | tmp[2] += a << 16;
|
---|
875 | tmp[6] &= 0x0000ffffffffffff;
|
---|
876 |
|
---|
877 | /*
|
---|
878 | * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
|
---|
879 | * non-zero, so we only need one step
|
---|
880 | */
|
---|
881 |
|
---|
882 | a = tmp[0] >> 63;
|
---|
883 | tmp[0] += a & two56;
|
---|
884 | tmp[1] -= a & 1;
|
---|
885 |
|
---|
886 | /* Carry 1 -> 2 -> 3 -> 4 -> 5 -> 6 */
|
---|
887 | tmp[2] += tmp[1] >> 56;
|
---|
888 | tmp[1] &= 0x00ffffffffffffff;
|
---|
889 |
|
---|
890 | tmp[3] += tmp[2] >> 56;
|
---|
891 | tmp[2] &= 0x00ffffffffffffff;
|
---|
892 |
|
---|
893 | tmp[4] += tmp[3] >> 56;
|
---|
894 | tmp[3] &= 0x00ffffffffffffff;
|
---|
895 |
|
---|
896 | tmp[5] += tmp[4] >> 56;
|
---|
897 | tmp[4] &= 0x00ffffffffffffff;
|
---|
898 |
|
---|
899 | tmp[6] += tmp[5] >> 56; /* tmp[6] < 2^48 */
|
---|
900 | tmp[5] &= 0x00ffffffffffffff;
|
---|
901 |
|
---|
902 | /*
|
---|
903 | * Case 2: a = all ones if p <= |in| < 2^384, 0 otherwise
|
---|
904 | */
|
---|
905 |
|
---|
906 | /* 0 iff (2^129..2^383) are all one */
|
---|
907 | cond[0] = ((tmp[6] | 0xff000000000000) & tmp[5] & tmp[4] & tmp[3] & (tmp[2] | 0x0000000001ffff)) + 1;
|
---|
908 | /* 0 iff 2^128 bit is one */
|
---|
909 | cond[1] = (tmp[2] | ~0x00000000010000) + 1;
|
---|
910 | /* 0 iff (2^96..2^127) bits are all one */
|
---|
911 | cond[2] = ((tmp[2] | 0xffffffffff0000) & (tmp[1] | 0x0000ffffffffff)) + 1;
|
---|
912 | /* 0 iff (2^32..2^95) bits are all zero */
|
---|
913 | cond[3] = (tmp[1] & ~0xffff0000000000) | (tmp[0] & ~((int64_t) 0x000000ffffffff));
|
---|
914 | /* 0 iff (2^0..2^31) bits are all one */
|
---|
915 | cond[4] = (tmp[0] | 0xffffff00000000) + 1;
|
---|
916 |
|
---|
917 | /*
|
---|
918 | * In effect, invert our conditions, so that 0 values become all 1's,
|
---|
919 | * any non-zero value in the low-order 56 bits becomes all 0's
|
---|
920 | */
|
---|
921 | for (i = 0; i < 5; i++)
|
---|
922 | cond[i] = ((cond[i] & 0x00ffffffffffffff) - 1) >> 63;
|
---|
923 |
|
---|
924 | /*
|
---|
925 | * The condition for determining whether in is greater than our
|
---|
926 | * prime is given by the following condition.
|
---|
927 | */
|
---|
928 |
|
---|
929 | /* First subtract 2^384 - 2^129 cheaply */
|
---|
930 | a = cond[0] & (cond[1] | (cond[2] & (~cond[3] | cond[4])));
|
---|
931 | tmp[6] &= ~a;
|
---|
932 | tmp[5] &= ~a;
|
---|
933 | tmp[4] &= ~a;
|
---|
934 | tmp[3] &= ~a;
|
---|
935 | tmp[2] &= ~a | 0x0000000001ffff;
|
---|
936 |
|
---|
937 | /*
|
---|
938 | * Subtract 2^128 - 2^96 by
|
---|
939 | * means of disjoint cases.
|
---|
940 | */
|
---|
941 |
|
---|
942 | /* subtract 2^128 if that bit is present, and add 2^96 */
|
---|
943 | a = cond[0] & cond[1];
|
---|
944 | tmp[2] &= ~a | 0xfffffffffeffff;
|
---|
945 | tmp[1] += a & ((int64_t) 1 << 40);
|
---|
946 |
|
---|
947 | /* otherwise, clear bits 2^127 .. 2^96 */
|
---|
948 | a = cond[0] & ~cond[1] & (cond[2] & (~cond[3] | cond[4]));
|
---|
949 | tmp[2] &= ~a | 0xffffffffff0000;
|
---|
950 | tmp[1] &= ~a | 0x0000ffffffffff;
|
---|
951 |
|
---|
952 | /* finally, subtract the last 2^32 - 1 */
|
---|
953 | a = cond[0] & (cond[1] | (cond[2] & (~cond[3] | cond[4])));
|
---|
954 | tmp[0] += a & (-((int64_t) 1 << 32) + 1);
|
---|
955 |
|
---|
956 | /*
|
---|
957 | * eliminate negative coefficients: if tmp[0] is negative, tmp[1] must be
|
---|
958 | * non-zero, so we only need one step
|
---|
959 | */
|
---|
960 | a = tmp[0] >> 63;
|
---|
961 | tmp[0] += a & two56;
|
---|
962 | tmp[1] -= a & 1;
|
---|
963 |
|
---|
964 | /* Carry 1 -> 2 -> 3 -> 4 -> 5 -> 6 */
|
---|
965 | tmp[2] += tmp[1] >> 56;
|
---|
966 | tmp[1] &= 0x00ffffffffffffff;
|
---|
967 |
|
---|
968 | tmp[3] += tmp[2] >> 56;
|
---|
969 | tmp[2] &= 0x00ffffffffffffff;
|
---|
970 |
|
---|
971 | tmp[4] += tmp[3] >> 56;
|
---|
972 | tmp[3] &= 0x00ffffffffffffff;
|
---|
973 |
|
---|
974 | tmp[5] += tmp[4] >> 56;
|
---|
975 | tmp[4] &= 0x00ffffffffffffff;
|
---|
976 |
|
---|
977 | tmp[6] += tmp[5] >> 56;
|
---|
978 | tmp[5] &= 0x00ffffffffffffff;
|
---|
979 |
|
---|
980 | memcpy(out, tmp, sizeof(felem));
|
---|
981 | }
|
---|
982 |
|
---|
983 | /*-
|
---|
984 | * Group operations
|
---|
985 | * ----------------
|
---|
986 | *
|
---|
987 | * Building on top of the field operations we have the operations on the
|
---|
988 | * elliptic curve group itself. Points on the curve are represented in Jacobian
|
---|
989 | * coordinates
|
---|
990 | */
|
---|
991 |
|
---|
992 | /*-
|
---|
993 | * point_double calculates 2*(x_in, y_in, z_in)
|
---|
994 | *
|
---|
995 | * The method is taken from:
|
---|
996 | * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
|
---|
997 | *
|
---|
998 | * Outputs can equal corresponding inputs, i.e., x_out == x_in is allowed.
|
---|
999 | * while x_out == y_in is not (maybe this works, but it's not tested).
|
---|
1000 | */
|
---|
1001 | static void
|
---|
1002 | point_double(felem x_out, felem y_out, felem z_out,
|
---|
1003 | const felem x_in, const felem y_in, const felem z_in)
|
---|
1004 | {
|
---|
1005 | widefelem tmp, tmp2;
|
---|
1006 | felem delta, gamma, beta, alpha, ftmp, ftmp2;
|
---|
1007 |
|
---|
1008 | felem_assign(ftmp, x_in);
|
---|
1009 | felem_assign(ftmp2, x_in);
|
---|
1010 |
|
---|
1011 | /* delta = z^2 */
|
---|
1012 | felem_square_reduce(delta, z_in); /* delta[i] < 2^56 */
|
---|
1013 |
|
---|
1014 | /* gamma = y^2 */
|
---|
1015 | felem_square_reduce(gamma, y_in); /* gamma[i] < 2^56 */
|
---|
1016 |
|
---|
1017 | /* beta = x*gamma */
|
---|
1018 | felem_mul_reduce(beta, x_in, gamma); /* beta[i] < 2^56 */
|
---|
1019 |
|
---|
1020 | /* alpha = 3*(x-delta)*(x+delta) */
|
---|
1021 | felem_diff64(ftmp, delta); /* ftmp[i] < 2^60 + 2^58 + 2^44 */
|
---|
1022 | felem_sum64(ftmp2, delta); /* ftmp2[i] < 2^59 */
|
---|
1023 | felem_scalar64(ftmp2, 3); /* ftmp2[i] < 2^61 */
|
---|
1024 | felem_mul_reduce(alpha, ftmp, ftmp2); /* alpha[i] < 2^56 */
|
---|
1025 |
|
---|
1026 | /* x' = alpha^2 - 8*beta */
|
---|
1027 | felem_square(tmp, alpha); /* tmp[i] < 2^115 */
|
---|
1028 | felem_assign(ftmp, beta); /* ftmp[i] < 2^56 */
|
---|
1029 | felem_scalar64(ftmp, 8); /* ftmp[i] < 2^59 */
|
---|
1030 | felem_diff_128_64(tmp, ftmp); /* tmp[i] < 2^115 + 2^64 + 2^48 */
|
---|
1031 | felem_reduce(x_out, tmp); /* x_out[i] < 2^56 */
|
---|
1032 |
|
---|
1033 | /* z' = (y + z)^2 - gamma - delta */
|
---|
1034 | felem_sum64(delta, gamma); /* delta[i] < 2^57 */
|
---|
1035 | felem_assign(ftmp, y_in); /* ftmp[i] < 2^56 */
|
---|
1036 | felem_sum64(ftmp, z_in); /* ftmp[i] < 2^56 */
|
---|
1037 | felem_square(tmp, ftmp); /* tmp[i] < 2^115 */
|
---|
1038 | felem_diff_128_64(tmp, delta); /* tmp[i] < 2^115 + 2^64 + 2^48 */
|
---|
1039 | felem_reduce(z_out, tmp); /* z_out[i] < 2^56 */
|
---|
1040 |
|
---|
1041 | /* y' = alpha*(4*beta - x') - 8*gamma^2 */
|
---|
1042 | felem_scalar64(beta, 4); /* beta[i] < 2^58 */
|
---|
1043 | felem_diff64(beta, x_out); /* beta[i] < 2^60 + 2^58 + 2^44 */
|
---|
1044 | felem_mul(tmp, alpha, beta); /* tmp[i] < 2^119 */
|
---|
1045 | felem_square(tmp2, gamma); /* tmp2[i] < 2^115 */
|
---|
1046 | felem_scalar128(tmp2, 8); /* tmp2[i] < 2^118 */
|
---|
1047 | felem_diff128(tmp, tmp2); /* tmp[i] < 2^127 + 2^119 + 2^111 */
|
---|
1048 | felem_reduce(y_out, tmp); /* tmp[i] < 2^56 */
|
---|
1049 | }
|
---|
1050 |
|
---|
1051 | /* copy_conditional copies in to out iff mask is all ones. */
|
---|
1052 | static void copy_conditional(felem out, const felem in, limb mask)
|
---|
1053 | {
|
---|
1054 | unsigned int i;
|
---|
1055 |
|
---|
1056 | for (i = 0; i < NLIMBS; i++)
|
---|
1057 | out[i] ^= mask & (in[i] ^ out[i]);
|
---|
1058 | }
|
---|
1059 |
|
---|
1060 | /*-
|
---|
1061 | * point_add calculates (x1, y1, z1) + (x2, y2, z2)
|
---|
1062 | *
|
---|
1063 | * The method is taken from
|
---|
1064 | * http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl,
|
---|
1065 | * adapted for mixed addition (z2 = 1, or z2 = 0 for the point at infinity).
|
---|
1066 | *
|
---|
1067 | * This function includes a branch for checking whether the two input points
|
---|
1068 | * are equal (while not equal to the point at infinity). See comment below
|
---|
1069 | * on constant-time.
|
---|
1070 | */
|
---|
1071 | static void point_add(felem x3, felem y3, felem z3,
|
---|
1072 | const felem x1, const felem y1, const felem z1,
|
---|
1073 | const int mixed, const felem x2, const felem y2,
|
---|
1074 | const felem z2)
|
---|
1075 | {
|
---|
1076 | felem ftmp, ftmp2, ftmp3, ftmp4, ftmp5, ftmp6, x_out, y_out, z_out;
|
---|
1077 | widefelem tmp, tmp2;
|
---|
1078 | limb x_equal, y_equal, z1_is_zero, z2_is_zero;
|
---|
1079 | limb points_equal;
|
---|
1080 |
|
---|
1081 | z1_is_zero = felem_is_zero(z1);
|
---|
1082 | z2_is_zero = felem_is_zero(z2);
|
---|
1083 |
|
---|
1084 | /* ftmp = z1z1 = z1**2 */
|
---|
1085 | felem_square_reduce(ftmp, z1); /* ftmp[i] < 2^56 */
|
---|
1086 |
|
---|
1087 | if (!mixed) {
|
---|
1088 | /* ftmp2 = z2z2 = z2**2 */
|
---|
1089 | felem_square_reduce(ftmp2, z2); /* ftmp2[i] < 2^56 */
|
---|
1090 |
|
---|
1091 | /* u1 = ftmp3 = x1*z2z2 */
|
---|
1092 | felem_mul_reduce(ftmp3, x1, ftmp2); /* ftmp3[i] < 2^56 */
|
---|
1093 |
|
---|
1094 | /* ftmp5 = z1 + z2 */
|
---|
1095 | felem_assign(ftmp5, z1); /* ftmp5[i] < 2^56 */
|
---|
1096 | felem_sum64(ftmp5, z2); /* ftmp5[i] < 2^57 */
|
---|
1097 |
|
---|
1098 | /* ftmp5 = (z1 + z2)**2 - z1z1 - z2z2 = 2*z1z2 */
|
---|
1099 | felem_square(tmp, ftmp5); /* tmp[i] < 2^117 */
|
---|
1100 | felem_diff_128_64(tmp, ftmp); /* tmp[i] < 2^117 + 2^64 + 2^48 */
|
---|
1101 | felem_diff_128_64(tmp, ftmp2); /* tmp[i] < 2^117 + 2^65 + 2^49 */
|
---|
1102 | felem_reduce(ftmp5, tmp); /* ftmp5[i] < 2^56 */
|
---|
1103 |
|
---|
1104 | /* ftmp2 = z2 * z2z2 */
|
---|
1105 | felem_mul_reduce(ftmp2, ftmp2, z2); /* ftmp2[i] < 2^56 */
|
---|
1106 |
|
---|
1107 | /* s1 = ftmp6 = y1 * z2**3 */
|
---|
1108 | felem_mul_reduce(ftmp6, y1, ftmp2); /* ftmp6[i] < 2^56 */
|
---|
1109 | } else {
|
---|
1110 | /*
|
---|
1111 | * We'll assume z2 = 1 (special case z2 = 0 is handled later)
|
---|
1112 | */
|
---|
1113 |
|
---|
1114 | /* u1 = ftmp3 = x1*z2z2 */
|
---|
1115 | felem_assign(ftmp3, x1); /* ftmp3[i] < 2^56 */
|
---|
1116 |
|
---|
1117 | /* ftmp5 = 2*z1z2 */
|
---|
1118 | felem_scalar(ftmp5, z1, 2); /* ftmp5[i] < 2^57 */
|
---|
1119 |
|
---|
1120 | /* s1 = ftmp6 = y1 * z2**3 */
|
---|
1121 | felem_assign(ftmp6, y1); /* ftmp6[i] < 2^56 */
|
---|
1122 | }
|
---|
1123 | /* ftmp3[i] < 2^56, ftmp5[i] < 2^57, ftmp6[i] < 2^56 */
|
---|
1124 |
|
---|
1125 | /* u2 = x2*z1z1 */
|
---|
1126 | felem_mul(tmp, x2, ftmp); /* tmp[i] < 2^115 */
|
---|
1127 |
|
---|
1128 | /* h = ftmp4 = u2 - u1 */
|
---|
1129 | felem_diff_128_64(tmp, ftmp3); /* tmp[i] < 2^115 + 2^64 + 2^48 */
|
---|
1130 | felem_reduce(ftmp4, tmp); /* ftmp[4] < 2^56 */
|
---|
1131 |
|
---|
1132 | x_equal = felem_is_zero(ftmp4);
|
---|
1133 |
|
---|
1134 | /* z_out = ftmp5 * h */
|
---|
1135 | felem_mul_reduce(z_out, ftmp5, ftmp4); /* z_out[i] < 2^56 */
|
---|
1136 |
|
---|
1137 | /* ftmp = z1 * z1z1 */
|
---|
1138 | felem_mul_reduce(ftmp, ftmp, z1); /* ftmp[i] < 2^56 */
|
---|
1139 |
|
---|
1140 | /* s2 = tmp = y2 * z1**3 */
|
---|
1141 | felem_mul(tmp, y2, ftmp); /* tmp[i] < 2^115 */
|
---|
1142 |
|
---|
1143 | /* r = ftmp5 = (s2 - s1)*2 */
|
---|
1144 | felem_diff_128_64(tmp, ftmp6); /* tmp[i] < 2^115 + 2^64 + 2^48 */
|
---|
1145 | felem_reduce(ftmp5, tmp); /* ftmp5[i] < 2^56 */
|
---|
1146 | y_equal = felem_is_zero(ftmp5);
|
---|
1147 | felem_scalar64(ftmp5, 2); /* ftmp5[i] < 2^57 */
|
---|
1148 |
|
---|
1149 | /*
|
---|
1150 | * The formulae are incorrect if the points are equal, in affine coordinates
|
---|
1151 | * (X_1, Y_1) == (X_2, Y_2), so we check for this and do doubling if this
|
---|
1152 | * happens.
|
---|
1153 | *
|
---|
1154 | * We use bitwise operations to avoid potential side-channels introduced by
|
---|
1155 | * the short-circuiting behaviour of boolean operators.
|
---|
1156 | *
|
---|
1157 | * The special case of either point being the point at infinity (z1 and/or
|
---|
1158 | * z2 are zero), is handled separately later on in this function, so we
|
---|
1159 | * avoid jumping to point_double here in those special cases.
|
---|
1160 | *
|
---|
1161 | * Notice the comment below on the implications of this branching for timing
|
---|
1162 | * leaks and why it is considered practically irrelevant.
|
---|
1163 | */
|
---|
1164 | points_equal = (x_equal & y_equal & (~z1_is_zero) & (~z2_is_zero));
|
---|
1165 |
|
---|
1166 | if (points_equal) {
|
---|
1167 | /*
|
---|
1168 | * This is obviously not constant-time but it will almost-never happen
|
---|
1169 | * for ECDH / ECDSA.
|
---|
1170 | */
|
---|
1171 | point_double(x3, y3, z3, x1, y1, z1);
|
---|
1172 | return;
|
---|
1173 | }
|
---|
1174 |
|
---|
1175 | /* I = ftmp = (2h)**2 */
|
---|
1176 | felem_assign(ftmp, ftmp4); /* ftmp[i] < 2^56 */
|
---|
1177 | felem_scalar64(ftmp, 2); /* ftmp[i] < 2^57 */
|
---|
1178 | felem_square_reduce(ftmp, ftmp); /* ftmp[i] < 2^56 */
|
---|
1179 |
|
---|
1180 | /* J = ftmp2 = h * I */
|
---|
1181 | felem_mul_reduce(ftmp2, ftmp4, ftmp); /* ftmp2[i] < 2^56 */
|
---|
1182 |
|
---|
1183 | /* V = ftmp4 = U1 * I */
|
---|
1184 | felem_mul_reduce(ftmp4, ftmp3, ftmp); /* ftmp4[i] < 2^56 */
|
---|
1185 |
|
---|
1186 | /* x_out = r**2 - J - 2V */
|
---|
1187 | felem_square(tmp, ftmp5); /* tmp[i] < 2^117 */
|
---|
1188 | felem_diff_128_64(tmp, ftmp2); /* tmp[i] < 2^117 + 2^64 + 2^48 */
|
---|
1189 | felem_assign(ftmp3, ftmp4); /* ftmp3[i] < 2^56 */
|
---|
1190 | felem_scalar64(ftmp4, 2); /* ftmp4[i] < 2^57 */
|
---|
1191 | felem_diff_128_64(tmp, ftmp4); /* tmp[i] < 2^117 + 2^65 + 2^49 */
|
---|
1192 | felem_reduce(x_out, tmp); /* x_out[i] < 2^56 */
|
---|
1193 |
|
---|
1194 | /* y_out = r(V-x_out) - 2 * s1 * J */
|
---|
1195 | felem_diff64(ftmp3, x_out); /* ftmp3[i] < 2^60 + 2^56 + 2^44 */
|
---|
1196 | felem_mul(tmp, ftmp5, ftmp3); /* tmp[i] < 2^116 */
|
---|
1197 | felem_mul(tmp2, ftmp6, ftmp2); /* tmp2[i] < 2^115 */
|
---|
1198 | felem_scalar128(tmp2, 2); /* tmp2[i] < 2^116 */
|
---|
1199 | felem_diff128(tmp, tmp2); /* tmp[i] < 2^127 + 2^116 + 2^111 */
|
---|
1200 | felem_reduce(y_out, tmp); /* y_out[i] < 2^56 */
|
---|
1201 |
|
---|
1202 | copy_conditional(x_out, x2, z1_is_zero);
|
---|
1203 | copy_conditional(x_out, x1, z2_is_zero);
|
---|
1204 | copy_conditional(y_out, y2, z1_is_zero);
|
---|
1205 | copy_conditional(y_out, y1, z2_is_zero);
|
---|
1206 | copy_conditional(z_out, z2, z1_is_zero);
|
---|
1207 | copy_conditional(z_out, z1, z2_is_zero);
|
---|
1208 | felem_assign(x3, x_out);
|
---|
1209 | felem_assign(y3, y_out);
|
---|
1210 | felem_assign(z3, z_out);
|
---|
1211 | }
|
---|
1212 |
|
---|
1213 | /*-
|
---|
1214 | * Base point pre computation
|
---|
1215 | * --------------------------
|
---|
1216 | *
|
---|
1217 | * Two different sorts of precomputed tables are used in the following code.
|
---|
1218 | * Each contain various points on the curve, where each point is three field
|
---|
1219 | * elements (x, y, z).
|
---|
1220 | *
|
---|
1221 | * For the base point table, z is usually 1 (0 for the point at infinity).
|
---|
1222 | * This table has 16 elements:
|
---|
1223 | * index | bits | point
|
---|
1224 | * ------+---------+------------------------------
|
---|
1225 | * 0 | 0 0 0 0 | 0G
|
---|
1226 | * 1 | 0 0 0 1 | 1G
|
---|
1227 | * 2 | 0 0 1 0 | 2^95G
|
---|
1228 | * 3 | 0 0 1 1 | (2^95 + 1)G
|
---|
1229 | * 4 | 0 1 0 0 | 2^190G
|
---|
1230 | * 5 | 0 1 0 1 | (2^190 + 1)G
|
---|
1231 | * 6 | 0 1 1 0 | (2^190 + 2^95)G
|
---|
1232 | * 7 | 0 1 1 1 | (2^190 + 2^95 + 1)G
|
---|
1233 | * 8 | 1 0 0 0 | 2^285G
|
---|
1234 | * 9 | 1 0 0 1 | (2^285 + 1)G
|
---|
1235 | * 10 | 1 0 1 0 | (2^285 + 2^95)G
|
---|
1236 | * 11 | 1 0 1 1 | (2^285 + 2^95 + 1)G
|
---|
1237 | * 12 | 1 1 0 0 | (2^285 + 2^190)G
|
---|
1238 | * 13 | 1 1 0 1 | (2^285 + 2^190 + 1)G
|
---|
1239 | * 14 | 1 1 1 0 | (2^285 + 2^190 + 2^95)G
|
---|
1240 | * 15 | 1 1 1 1 | (2^285 + 2^190 + 2^95 + 1)G
|
---|
1241 | *
|
---|
1242 | * The reason for this is so that we can clock bits into four different
|
---|
1243 | * locations when doing simple scalar multiplies against the base point.
|
---|
1244 | *
|
---|
1245 | * Tables for other points have table[i] = iG for i in 0 .. 16.
|
---|
1246 | */
|
---|
1247 |
|
---|
1248 | /* gmul is the table of precomputed base points */
|
---|
1249 | static const felem gmul[16][3] = {
|
---|
1250 | {{0, 0, 0, 0, 0, 0, 0},
|
---|
1251 | {0, 0, 0, 0, 0, 0, 0},
|
---|
1252 | {0, 0, 0, 0, 0, 0, 0}},
|
---|
1253 | {{0x00545e3872760ab7, 0x00f25dbf55296c3a, 0x00e082542a385502, 0x008ba79b9859f741,
|
---|
1254 | 0x0020ad746e1d3b62, 0x0005378eb1c71ef3, 0x0000aa87ca22be8b},
|
---|
1255 | {0x00431d7c90ea0e5f, 0x00b1ce1d7e819d7a, 0x0013b5f0b8c00a60, 0x00289a147ce9da31,
|
---|
1256 | 0x0092dc29f8f41dbd, 0x002c6f5d9e98bf92, 0x00003617de4a9626},
|
---|
1257 | {1, 0, 0, 0, 0, 0, 0}},
|
---|
1258 | {{0x00024711cc902a90, 0x00acb2e579ab4fe1, 0x00af818a4b4d57b1, 0x00a17c7bec49c3de,
|
---|
1259 | 0x004280482d726a8b, 0x00128dd0f0a90f3b, 0x00004387c1c3fa3c},
|
---|
1260 | {0x002ce76543cf5c3a, 0x00de6cee5ef58f0a, 0x00403e42fa561ca6, 0x00bc54d6f9cb9731,
|
---|
1261 | 0x007155f925fb4ff1, 0x004a9ce731b7b9bc, 0x00002609076bd7b2},
|
---|
1262 | {1, 0, 0, 0, 0, 0, 0}},
|
---|
1263 | {{0x00e74c9182f0251d, 0x0039bf54bb111974, 0x00b9d2f2eec511d2, 0x0036b1594eb3a6a4,
|
---|
1264 | 0x00ac3bb82d9d564b, 0x00f9313f4615a100, 0x00006716a9a91b10},
|
---|
1265 | {0x0046698116e2f15c, 0x00f34347067d3d33, 0x008de4ccfdebd002, 0x00e838c6b8e8c97b,
|
---|
1266 | 0x006faf0798def346, 0x007349794a57563c, 0x00002629e7e6ad84},
|
---|
1267 | {1, 0, 0, 0, 0, 0, 0}},
|
---|
1268 | {{0x0075300e34fd163b, 0x0092e9db4e8d0ad3, 0x00254be9f625f760, 0x00512c518c72ae68,
|
---|
1269 | 0x009bfcf162bede5a, 0x00bf9341566ce311, 0x0000cd6175bd41cf},
|
---|
1270 | {0x007dfe52af4ac70f, 0x0002159d2d5c4880, 0x00b504d16f0af8d0, 0x0014585e11f5e64c,
|
---|
1271 | 0x0089c6388e030967, 0x00ffb270cbfa5f71, 0x00009a15d92c3947},
|
---|
1272 | {1, 0, 0, 0, 0, 0, 0}},
|
---|
1273 | {{0x0033fc1278dc4fe5, 0x00d53088c2caa043, 0x0085558827e2db66, 0x00c192bef387b736,
|
---|
1274 | 0x00df6405a2225f2c, 0x0075205aa90fd91a, 0x0000137e3f12349d},
|
---|
1275 | {0x00ce5b115efcb07e, 0x00abc3308410deeb, 0x005dc6fc1de39904, 0x00907c1c496f36b4,
|
---|
1276 | 0x0008e6ad3926cbe1, 0x00110747b787928c, 0x0000021b9162eb7e},
|
---|
1277 | {1, 0, 0, 0, 0, 0, 0}},
|
---|
1278 | {{0x008180042cfa26e1, 0x007b826a96254967, 0x0082473694d6b194, 0x007bd6880a45b589,
|
---|
1279 | 0x00c0a5097072d1a3, 0x0019186555e18b4e, 0x000020278190e5ca},
|
---|
1280 | {0x00b4bef17de61ac0, 0x009535e3c38ed348, 0x002d4aa8e468ceab, 0x00ef40b431036ad3,
|
---|
1281 | 0x00defd52f4542857, 0x0086edbf98234266, 0x00002025b3a7814d},
|
---|
1282 | {1, 0, 0, 0, 0, 0, 0}},
|
---|
1283 | {{0x00b238aa97b886be, 0x00ef3192d6dd3a32, 0x0079f9e01fd62df8, 0x00742e890daba6c5,
|
---|
1284 | 0x008e5289144408ce, 0x0073bbcc8e0171a5, 0x0000c4fd329d3b52},
|
---|
1285 | {0x00c6f64a15ee23e7, 0x00dcfb7b171cad8b, 0x00039f6cbd805867, 0x00de024e428d4562,
|
---|
1286 | 0x00be6a594d7c64c5, 0x0078467b70dbcd64, 0x0000251f2ed7079b},
|
---|
1287 | {1, 0, 0, 0, 0, 0, 0}},
|
---|
1288 | {{0x000e5cc25fc4b872, 0x005ebf10d31ef4e1, 0x0061e0ebd11e8256, 0x0076e026096f5a27,
|
---|
1289 | 0x0013e6fc44662e9a, 0x0042b00289d3597e, 0x000024f089170d88},
|
---|
1290 | {0x001604d7e0effbe6, 0x0048d77cba64ec2c, 0x008166b16da19e36, 0x006b0d1a0f28c088,
|
---|
1291 | 0x000259fcd47754fd, 0x00cc643e4d725f9a, 0x00007b10f3c79c14},
|
---|
1292 | {1, 0, 0, 0, 0, 0, 0}},
|
---|
1293 | {{0x00430155e3b908af, 0x00b801e4fec25226, 0x00b0d4bcfe806d26, 0x009fc4014eb13d37,
|
---|
1294 | 0x0066c94e44ec07e8, 0x00d16adc03874ba2, 0x000030c917a0d2a7},
|
---|
1295 | {0x00edac9e21eb891c, 0x00ef0fb768102eff, 0x00c088cef272a5f3, 0x00cbf782134e2964,
|
---|
1296 | 0x0001044a7ba9a0e3, 0x00e363f5b194cf3c, 0x00009ce85249e372},
|
---|
1297 | {1, 0, 0, 0, 0, 0, 0}},
|
---|
1298 | {{0x001dd492dda5a7eb, 0x008fd577be539fd1, 0x002ff4b25a5fc3f1, 0x0074a8a1b64df72f,
|
---|
1299 | 0x002ba3d8c204a76c, 0x009d5cff95c8235a, 0x0000e014b9406e0f},
|
---|
1300 | {0x008c2e4dbfc98aba, 0x00f30bb89f1a1436, 0x00b46f7aea3e259c, 0x009224454ac02f54,
|
---|
1301 | 0x00906401f5645fa2, 0x003a1d1940eabc77, 0x00007c9351d680e6},
|
---|
1302 | {1, 0, 0, 0, 0, 0, 0}},
|
---|
1303 | {{0x005a35d872ef967c, 0x0049f1b7884e1987, 0x0059d46d7e31f552, 0x00ceb4869d2d0fb6,
|
---|
1304 | 0x00e8e89eee56802a, 0x0049d806a774aaf2, 0x0000147e2af0ae24},
|
---|
1305 | {0x005fd1bd852c6e5e, 0x00b674b7b3de6885, 0x003b9ea5eb9b6c08, 0x005c9f03babf3ef7,
|
---|
1306 | 0x00605337fecab3c7, 0x009a3f85b11bbcc8, 0x0000455470f330ec},
|
---|
1307 | {1, 0, 0, 0, 0, 0, 0}},
|
---|
1308 | {{0x002197ff4d55498d, 0x00383e8916c2d8af, 0x00eb203f34d1c6d2, 0x0080367cbd11b542,
|
---|
1309 | 0x00769b3be864e4f5, 0x0081a8458521c7bb, 0x0000c531b34d3539},
|
---|
1310 | {0x00e2a3d775fa2e13, 0x00534fc379573844, 0x00ff237d2a8db54a, 0x00d301b2335a8882,
|
---|
1311 | 0x000f75ea96103a80, 0x0018fecb3cdd96fa, 0x0000304bf61e94eb},
|
---|
1312 | {1, 0, 0, 0, 0, 0, 0}},
|
---|
1313 | {{0x00b2afc332a73dbd, 0x0029a0d5bb007bc5, 0x002d628eb210f577, 0x009f59a36dd05f50,
|
---|
1314 | 0x006d339de4eca613, 0x00c75a71addc86bc, 0x000060384c5ea93c},
|
---|
1315 | {0x00aa9641c32a30b4, 0x00cc73ae8cce565d, 0x00ec911a4df07f61, 0x00aa4b762ea4b264,
|
---|
1316 | 0x0096d395bb393629, 0x004efacfb7632fe0, 0x00006f252f46fa3f},
|
---|
1317 | {1, 0, 0, 0, 0, 0, 0}},
|
---|
1318 | {{0x00567eec597c7af6, 0x0059ba6795204413, 0x00816d4e6f01196f, 0x004ae6b3eb57951d,
|
---|
1319 | 0x00420f5abdda2108, 0x003401d1f57ca9d9, 0x0000cf5837b0b67a},
|
---|
1320 | {0x00eaa64b8aeeabf9, 0x00246ddf16bcb4de, 0x000e7e3c3aecd751, 0x0008449f04fed72e,
|
---|
1321 | 0x00307b67ccf09183, 0x0017108c3556b7b1, 0x0000229b2483b3bf},
|
---|
1322 | {1, 0, 0, 0, 0, 0, 0}},
|
---|
1323 | {{0x00e7c491a7bb78a1, 0x00eafddd1d3049ab, 0x00352c05e2bc7c98, 0x003d6880c165fa5c,
|
---|
1324 | 0x00b6ac61cc11c97d, 0x00beeb54fcf90ce5, 0x0000dc1f0b455edc},
|
---|
1325 | {0x002db2e7aee34d60, 0x0073b5f415a2d8c0, 0x00dd84e4193e9a0c, 0x00d02d873467c572,
|
---|
1326 | 0x0018baaeda60aee5, 0x0013fb11d697c61e, 0x000083aafcc3a973},
|
---|
1327 | {1, 0, 0, 0, 0, 0, 0}}
|
---|
1328 | };
|
---|
1329 |
|
---|
1330 | /*
|
---|
1331 | * select_point selects the |idx|th point from a precomputation table and
|
---|
1332 | * copies it to out.
|
---|
1333 | *
|
---|
1334 | * pre_comp below is of the size provided in |size|.
|
---|
1335 | */
|
---|
1336 | static void select_point(const limb idx, unsigned int size,
|
---|
1337 | const felem pre_comp[][3], felem out[3])
|
---|
1338 | {
|
---|
1339 | unsigned int i, j;
|
---|
1340 | limb *outlimbs = &out[0][0];
|
---|
1341 |
|
---|
1342 | memset(out, 0, sizeof(*out) * 3);
|
---|
1343 |
|
---|
1344 | for (i = 0; i < size; i++) {
|
---|
1345 | const limb *inlimbs = &pre_comp[i][0][0];
|
---|
1346 | limb mask = i ^ idx;
|
---|
1347 |
|
---|
1348 | mask |= mask >> 4;
|
---|
1349 | mask |= mask >> 2;
|
---|
1350 | mask |= mask >> 1;
|
---|
1351 | mask &= 1;
|
---|
1352 | mask--;
|
---|
1353 | for (j = 0; j < NLIMBS * 3; j++)
|
---|
1354 | outlimbs[j] |= inlimbs[j] & mask;
|
---|
1355 | }
|
---|
1356 | }
|
---|
1357 |
|
---|
1358 | /* get_bit returns the |i|th bit in |in| */
|
---|
1359 | static char get_bit(const felem_bytearray in, int i)
|
---|
1360 | {
|
---|
1361 | if (i < 0 || i >= 384)
|
---|
1362 | return 0;
|
---|
1363 | return (in[i >> 3] >> (i & 7)) & 1;
|
---|
1364 | }
|
---|
1365 |
|
---|
1366 | /*
|
---|
1367 | * Interleaved point multiplication using precomputed point multiples: The
|
---|
1368 | * small point multiples 0*P, 1*P, ..., 16*P are in pre_comp[], the scalars
|
---|
1369 | * in scalars[]. If g_scalar is non-NULL, we also add this multiple of the
|
---|
1370 | * generator, using certain (large) precomputed multiples in g_pre_comp.
|
---|
1371 | * Output point (X, Y, Z) is stored in x_out, y_out, z_out
|
---|
1372 | */
|
---|
1373 | static void batch_mul(felem x_out, felem y_out, felem z_out,
|
---|
1374 | const felem_bytearray scalars[],
|
---|
1375 | const unsigned int num_points, const u8 *g_scalar,
|
---|
1376 | const int mixed, const felem pre_comp[][17][3],
|
---|
1377 | const felem g_pre_comp[16][3])
|
---|
1378 | {
|
---|
1379 | int i, skip;
|
---|
1380 | unsigned int num, gen_mul = (g_scalar != NULL);
|
---|
1381 | felem nq[3], tmp[4];
|
---|
1382 | limb bits;
|
---|
1383 | u8 sign, digit;
|
---|
1384 |
|
---|
1385 | /* set nq to the point at infinity */
|
---|
1386 | memset(nq, 0, sizeof(nq));
|
---|
1387 |
|
---|
1388 | /*
|
---|
1389 | * Loop over all scalars msb-to-lsb, interleaving additions of multiples
|
---|
1390 | * of the generator (last quarter of rounds) and additions of other
|
---|
1391 | * points multiples (every 5th round).
|
---|
1392 | */
|
---|
1393 | skip = 1; /* save two point operations in the first
|
---|
1394 | * round */
|
---|
1395 | for (i = (num_points ? 380 : 98); i >= 0; --i) {
|
---|
1396 | /* double */
|
---|
1397 | if (!skip)
|
---|
1398 | point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]);
|
---|
1399 |
|
---|
1400 | /* add multiples of the generator */
|
---|
1401 | if (gen_mul && (i <= 98)) {
|
---|
1402 | bits = get_bit(g_scalar, i + 285) << 3;
|
---|
1403 | if (i < 95) {
|
---|
1404 | bits |= get_bit(g_scalar, i + 190) << 2;
|
---|
1405 | bits |= get_bit(g_scalar, i + 95) << 1;
|
---|
1406 | bits |= get_bit(g_scalar, i);
|
---|
1407 | }
|
---|
1408 | /* select the point to add, in constant time */
|
---|
1409 | select_point(bits, 16, g_pre_comp, tmp);
|
---|
1410 | if (!skip) {
|
---|
1411 | /* The 1 argument below is for "mixed" */
|
---|
1412 | point_add(nq[0], nq[1], nq[2],
|
---|
1413 | nq[0], nq[1], nq[2], 1,
|
---|
1414 | tmp[0], tmp[1], tmp[2]);
|
---|
1415 | } else {
|
---|
1416 | memcpy(nq, tmp, 3 * sizeof(felem));
|
---|
1417 | skip = 0;
|
---|
1418 | }
|
---|
1419 | }
|
---|
1420 |
|
---|
1421 | /* do other additions every 5 doublings */
|
---|
1422 | if (num_points && (i % 5 == 0)) {
|
---|
1423 | /* loop over all scalars */
|
---|
1424 | for (num = 0; num < num_points; ++num) {
|
---|
1425 | bits = get_bit(scalars[num], i + 4) << 5;
|
---|
1426 | bits |= get_bit(scalars[num], i + 3) << 4;
|
---|
1427 | bits |= get_bit(scalars[num], i + 2) << 3;
|
---|
1428 | bits |= get_bit(scalars[num], i + 1) << 2;
|
---|
1429 | bits |= get_bit(scalars[num], i) << 1;
|
---|
1430 | bits |= get_bit(scalars[num], i - 1);
|
---|
1431 | ossl_ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
|
---|
1432 |
|
---|
1433 | /*
|
---|
1434 | * select the point to add or subtract, in constant time
|
---|
1435 | */
|
---|
1436 | select_point(digit, 17, pre_comp[num], tmp);
|
---|
1437 | felem_neg(tmp[3], tmp[1]); /* (X, -Y, Z) is the negative
|
---|
1438 | * point */
|
---|
1439 | copy_conditional(tmp[1], tmp[3], (-(limb) sign));
|
---|
1440 |
|
---|
1441 | if (!skip) {
|
---|
1442 | point_add(nq[0], nq[1], nq[2],
|
---|
1443 | nq[0], nq[1], nq[2], mixed,
|
---|
1444 | tmp[0], tmp[1], tmp[2]);
|
---|
1445 | } else {
|
---|
1446 | memcpy(nq, tmp, 3 * sizeof(felem));
|
---|
1447 | skip = 0;
|
---|
1448 | }
|
---|
1449 | }
|
---|
1450 | }
|
---|
1451 | }
|
---|
1452 | felem_assign(x_out, nq[0]);
|
---|
1453 | felem_assign(y_out, nq[1]);
|
---|
1454 | felem_assign(z_out, nq[2]);
|
---|
1455 | }
|
---|
1456 |
|
---|
1457 | /* Precomputation for the group generator. */
|
---|
1458 | struct nistp384_pre_comp_st {
|
---|
1459 | felem g_pre_comp[16][3];
|
---|
1460 | CRYPTO_REF_COUNT references;
|
---|
1461 | };
|
---|
1462 |
|
---|
1463 | const EC_METHOD *ossl_ec_GFp_nistp384_method(void)
|
---|
1464 | {
|
---|
1465 | static const EC_METHOD ret = {
|
---|
1466 | EC_FLAGS_DEFAULT_OCT,
|
---|
1467 | NID_X9_62_prime_field,
|
---|
1468 | ossl_ec_GFp_nistp384_group_init,
|
---|
1469 | ossl_ec_GFp_simple_group_finish,
|
---|
1470 | ossl_ec_GFp_simple_group_clear_finish,
|
---|
1471 | ossl_ec_GFp_nist_group_copy,
|
---|
1472 | ossl_ec_GFp_nistp384_group_set_curve,
|
---|
1473 | ossl_ec_GFp_simple_group_get_curve,
|
---|
1474 | ossl_ec_GFp_simple_group_get_degree,
|
---|
1475 | ossl_ec_group_simple_order_bits,
|
---|
1476 | ossl_ec_GFp_simple_group_check_discriminant,
|
---|
1477 | ossl_ec_GFp_simple_point_init,
|
---|
1478 | ossl_ec_GFp_simple_point_finish,
|
---|
1479 | ossl_ec_GFp_simple_point_clear_finish,
|
---|
1480 | ossl_ec_GFp_simple_point_copy,
|
---|
1481 | ossl_ec_GFp_simple_point_set_to_infinity,
|
---|
1482 | ossl_ec_GFp_simple_point_set_affine_coordinates,
|
---|
1483 | ossl_ec_GFp_nistp384_point_get_affine_coordinates,
|
---|
1484 | 0, /* point_set_compressed_coordinates */
|
---|
1485 | 0, /* point2oct */
|
---|
1486 | 0, /* oct2point */
|
---|
1487 | ossl_ec_GFp_simple_add,
|
---|
1488 | ossl_ec_GFp_simple_dbl,
|
---|
1489 | ossl_ec_GFp_simple_invert,
|
---|
1490 | ossl_ec_GFp_simple_is_at_infinity,
|
---|
1491 | ossl_ec_GFp_simple_is_on_curve,
|
---|
1492 | ossl_ec_GFp_simple_cmp,
|
---|
1493 | ossl_ec_GFp_simple_make_affine,
|
---|
1494 | ossl_ec_GFp_simple_points_make_affine,
|
---|
1495 | ossl_ec_GFp_nistp384_points_mul,
|
---|
1496 | ossl_ec_GFp_nistp384_precompute_mult,
|
---|
1497 | ossl_ec_GFp_nistp384_have_precompute_mult,
|
---|
1498 | ossl_ec_GFp_nist_field_mul,
|
---|
1499 | ossl_ec_GFp_nist_field_sqr,
|
---|
1500 | 0, /* field_div */
|
---|
1501 | ossl_ec_GFp_simple_field_inv,
|
---|
1502 | 0, /* field_encode */
|
---|
1503 | 0, /* field_decode */
|
---|
1504 | 0, /* field_set_to_one */
|
---|
1505 | ossl_ec_key_simple_priv2oct,
|
---|
1506 | ossl_ec_key_simple_oct2priv,
|
---|
1507 | 0, /* set private */
|
---|
1508 | ossl_ec_key_simple_generate_key,
|
---|
1509 | ossl_ec_key_simple_check_key,
|
---|
1510 | ossl_ec_key_simple_generate_public_key,
|
---|
1511 | 0, /* keycopy */
|
---|
1512 | 0, /* keyfinish */
|
---|
1513 | ossl_ecdh_simple_compute_key,
|
---|
1514 | ossl_ecdsa_simple_sign_setup,
|
---|
1515 | ossl_ecdsa_simple_sign_sig,
|
---|
1516 | ossl_ecdsa_simple_verify_sig,
|
---|
1517 | 0, /* field_inverse_mod_ord */
|
---|
1518 | 0, /* blind_coordinates */
|
---|
1519 | 0, /* ladder_pre */
|
---|
1520 | 0, /* ladder_step */
|
---|
1521 | 0 /* ladder_post */
|
---|
1522 | };
|
---|
1523 |
|
---|
1524 | return &ret;
|
---|
1525 | }
|
---|
1526 |
|
---|
1527 | /******************************************************************************/
|
---|
1528 | /*
|
---|
1529 | * FUNCTIONS TO MANAGE PRECOMPUTATION
|
---|
1530 | */
|
---|
1531 |
|
---|
1532 | static NISTP384_PRE_COMP *nistp384_pre_comp_new(void)
|
---|
1533 | {
|
---|
1534 | NISTP384_PRE_COMP *ret = OPENSSL_zalloc(sizeof(*ret));
|
---|
1535 |
|
---|
1536 | if (ret == NULL)
|
---|
1537 | return ret;
|
---|
1538 |
|
---|
1539 | if (!CRYPTO_NEW_REF(&ret->references, 1)) {
|
---|
1540 | OPENSSL_free(ret);
|
---|
1541 | return NULL;
|
---|
1542 | }
|
---|
1543 | return ret;
|
---|
1544 | }
|
---|
1545 |
|
---|
1546 | NISTP384_PRE_COMP *ossl_ec_nistp384_pre_comp_dup(NISTP384_PRE_COMP *p)
|
---|
1547 | {
|
---|
1548 | int i;
|
---|
1549 |
|
---|
1550 | if (p != NULL)
|
---|
1551 | CRYPTO_UP_REF(&p->references, &i);
|
---|
1552 | return p;
|
---|
1553 | }
|
---|
1554 |
|
---|
1555 | void ossl_ec_nistp384_pre_comp_free(NISTP384_PRE_COMP *p)
|
---|
1556 | {
|
---|
1557 | int i;
|
---|
1558 |
|
---|
1559 | if (p == NULL)
|
---|
1560 | return;
|
---|
1561 |
|
---|
1562 | CRYPTO_DOWN_REF(&p->references, &i);
|
---|
1563 | REF_PRINT_COUNT("ossl_ec_nistp384", p);
|
---|
1564 | if (i > 0)
|
---|
1565 | return;
|
---|
1566 | REF_ASSERT_ISNT(i < 0);
|
---|
1567 |
|
---|
1568 | CRYPTO_FREE_REF(&p->references);
|
---|
1569 | OPENSSL_free(p);
|
---|
1570 | }
|
---|
1571 |
|
---|
1572 | /******************************************************************************/
|
---|
1573 | /*
|
---|
1574 | * OPENSSL EC_METHOD FUNCTIONS
|
---|
1575 | */
|
---|
1576 |
|
---|
1577 | int ossl_ec_GFp_nistp384_group_init(EC_GROUP *group)
|
---|
1578 | {
|
---|
1579 | int ret;
|
---|
1580 |
|
---|
1581 | ret = ossl_ec_GFp_simple_group_init(group);
|
---|
1582 | group->a_is_minus3 = 1;
|
---|
1583 | return ret;
|
---|
1584 | }
|
---|
1585 |
|
---|
1586 | int ossl_ec_GFp_nistp384_group_set_curve(EC_GROUP *group, const BIGNUM *p,
|
---|
1587 | const BIGNUM *a, const BIGNUM *b,
|
---|
1588 | BN_CTX *ctx)
|
---|
1589 | {
|
---|
1590 | int ret = 0;
|
---|
1591 | BIGNUM *curve_p, *curve_a, *curve_b;
|
---|
1592 | #ifndef FIPS_MODULE
|
---|
1593 | BN_CTX *new_ctx = NULL;
|
---|
1594 |
|
---|
1595 | if (ctx == NULL)
|
---|
1596 | ctx = new_ctx = BN_CTX_new();
|
---|
1597 | #endif
|
---|
1598 | if (ctx == NULL)
|
---|
1599 | return 0;
|
---|
1600 |
|
---|
1601 | BN_CTX_start(ctx);
|
---|
1602 | curve_p = BN_CTX_get(ctx);
|
---|
1603 | curve_a = BN_CTX_get(ctx);
|
---|
1604 | curve_b = BN_CTX_get(ctx);
|
---|
1605 | if (curve_b == NULL)
|
---|
1606 | goto err;
|
---|
1607 | BN_bin2bn(nistp384_curve_params[0], sizeof(felem_bytearray), curve_p);
|
---|
1608 | BN_bin2bn(nistp384_curve_params[1], sizeof(felem_bytearray), curve_a);
|
---|
1609 | BN_bin2bn(nistp384_curve_params[2], sizeof(felem_bytearray), curve_b);
|
---|
1610 | if ((BN_cmp(curve_p, p)) || (BN_cmp(curve_a, a)) || (BN_cmp(curve_b, b))) {
|
---|
1611 | ERR_raise(ERR_LIB_EC, EC_R_WRONG_CURVE_PARAMETERS);
|
---|
1612 | goto err;
|
---|
1613 | }
|
---|
1614 | group->field_mod_func = BN_nist_mod_384;
|
---|
1615 | ret = ossl_ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
|
---|
1616 | err:
|
---|
1617 | BN_CTX_end(ctx);
|
---|
1618 | #ifndef FIPS_MODULE
|
---|
1619 | BN_CTX_free(new_ctx);
|
---|
1620 | #endif
|
---|
1621 | return ret;
|
---|
1622 | }
|
---|
1623 |
|
---|
1624 | /*
|
---|
1625 | * Takes the Jacobian coordinates (X, Y, Z) of a point and returns (X', Y') =
|
---|
1626 | * (X/Z^2, Y/Z^3)
|
---|
1627 | */
|
---|
1628 | int ossl_ec_GFp_nistp384_point_get_affine_coordinates(const EC_GROUP *group,
|
---|
1629 | const EC_POINT *point,
|
---|
1630 | BIGNUM *x, BIGNUM *y,
|
---|
1631 | BN_CTX *ctx)
|
---|
1632 | {
|
---|
1633 | felem z1, z2, x_in, y_in, x_out, y_out;
|
---|
1634 | widefelem tmp;
|
---|
1635 |
|
---|
1636 | if (EC_POINT_is_at_infinity(group, point)) {
|
---|
1637 | ERR_raise(ERR_LIB_EC, EC_R_POINT_AT_INFINITY);
|
---|
1638 | return 0;
|
---|
1639 | }
|
---|
1640 | if ((!BN_to_felem(x_in, point->X)) || (!BN_to_felem(y_in, point->Y)) ||
|
---|
1641 | (!BN_to_felem(z1, point->Z)))
|
---|
1642 | return 0;
|
---|
1643 | felem_inv(z2, z1);
|
---|
1644 | felem_square(tmp, z2);
|
---|
1645 | felem_reduce(z1, tmp);
|
---|
1646 | felem_mul(tmp, x_in, z1);
|
---|
1647 | felem_reduce(x_in, tmp);
|
---|
1648 | felem_contract(x_out, x_in);
|
---|
1649 | if (x != NULL) {
|
---|
1650 | if (!felem_to_BN(x, x_out)) {
|
---|
1651 | ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
|
---|
1652 | return 0;
|
---|
1653 | }
|
---|
1654 | }
|
---|
1655 | felem_mul(tmp, z1, z2);
|
---|
1656 | felem_reduce(z1, tmp);
|
---|
1657 | felem_mul(tmp, y_in, z1);
|
---|
1658 | felem_reduce(y_in, tmp);
|
---|
1659 | felem_contract(y_out, y_in);
|
---|
1660 | if (y != NULL) {
|
---|
1661 | if (!felem_to_BN(y, y_out)) {
|
---|
1662 | ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
|
---|
1663 | return 0;
|
---|
1664 | }
|
---|
1665 | }
|
---|
1666 | return 1;
|
---|
1667 | }
|
---|
1668 |
|
---|
1669 | /* points below is of size |num|, and tmp_felems is of size |num+1/ */
|
---|
1670 | static void make_points_affine(size_t num, felem points[][3],
|
---|
1671 | felem tmp_felems[])
|
---|
1672 | {
|
---|
1673 | /*
|
---|
1674 | * Runs in constant time, unless an input is the point at infinity (which
|
---|
1675 | * normally shouldn't happen).
|
---|
1676 | */
|
---|
1677 | ossl_ec_GFp_nistp_points_make_affine_internal(num,
|
---|
1678 | points,
|
---|
1679 | sizeof(felem),
|
---|
1680 | tmp_felems,
|
---|
1681 | (void (*)(void *))felem_one,
|
---|
1682 | felem_is_zero_int,
|
---|
1683 | (void (*)(void *, const void *))
|
---|
1684 | felem_assign,
|
---|
1685 | (void (*)(void *, const void *))
|
---|
1686 | felem_square_reduce,
|
---|
1687 | (void (*)(void *, const void *, const void*))
|
---|
1688 | felem_mul_reduce,
|
---|
1689 | (void (*)(void *, const void *))
|
---|
1690 | felem_inv,
|
---|
1691 | (void (*)(void *, const void *))
|
---|
1692 | felem_contract);
|
---|
1693 | }
|
---|
1694 |
|
---|
1695 | /*
|
---|
1696 | * Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
|
---|
1697 | * values Result is stored in r (r can equal one of the inputs).
|
---|
1698 | */
|
---|
1699 | int ossl_ec_GFp_nistp384_points_mul(const EC_GROUP *group, EC_POINT *r,
|
---|
1700 | const BIGNUM *scalar, size_t num,
|
---|
1701 | const EC_POINT *points[],
|
---|
1702 | const BIGNUM *scalars[], BN_CTX *ctx)
|
---|
1703 | {
|
---|
1704 | int ret = 0;
|
---|
1705 | int j;
|
---|
1706 | int mixed = 0;
|
---|
1707 | BIGNUM *x, *y, *z, *tmp_scalar;
|
---|
1708 | felem_bytearray g_secret;
|
---|
1709 | felem_bytearray *secrets = NULL;
|
---|
1710 | felem (*pre_comp)[17][3] = NULL;
|
---|
1711 | felem *tmp_felems = NULL;
|
---|
1712 | unsigned int i;
|
---|
1713 | int num_bytes;
|
---|
1714 | int have_pre_comp = 0;
|
---|
1715 | size_t num_points = num;
|
---|
1716 | felem x_in, y_in, z_in, x_out, y_out, z_out;
|
---|
1717 | NISTP384_PRE_COMP *pre = NULL;
|
---|
1718 | felem(*g_pre_comp)[3] = NULL;
|
---|
1719 | EC_POINT *generator = NULL;
|
---|
1720 | const EC_POINT *p = NULL;
|
---|
1721 | const BIGNUM *p_scalar = NULL;
|
---|
1722 |
|
---|
1723 | BN_CTX_start(ctx);
|
---|
1724 | x = BN_CTX_get(ctx);
|
---|
1725 | y = BN_CTX_get(ctx);
|
---|
1726 | z = BN_CTX_get(ctx);
|
---|
1727 | tmp_scalar = BN_CTX_get(ctx);
|
---|
1728 | if (tmp_scalar == NULL)
|
---|
1729 | goto err;
|
---|
1730 |
|
---|
1731 | if (scalar != NULL) {
|
---|
1732 | pre = group->pre_comp.nistp384;
|
---|
1733 | if (pre)
|
---|
1734 | /* we have precomputation, try to use it */
|
---|
1735 | g_pre_comp = &pre->g_pre_comp[0];
|
---|
1736 | else
|
---|
1737 | /* try to use the standard precomputation */
|
---|
1738 | g_pre_comp = (felem(*)[3]) gmul;
|
---|
1739 | generator = EC_POINT_new(group);
|
---|
1740 | if (generator == NULL)
|
---|
1741 | goto err;
|
---|
1742 | /* get the generator from precomputation */
|
---|
1743 | if (!felem_to_BN(x, g_pre_comp[1][0]) ||
|
---|
1744 | !felem_to_BN(y, g_pre_comp[1][1]) ||
|
---|
1745 | !felem_to_BN(z, g_pre_comp[1][2])) {
|
---|
1746 | ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
|
---|
1747 | goto err;
|
---|
1748 | }
|
---|
1749 | if (!ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group,
|
---|
1750 | generator,
|
---|
1751 | x, y, z, ctx))
|
---|
1752 | goto err;
|
---|
1753 | if (0 == EC_POINT_cmp(group, generator, group->generator, ctx))
|
---|
1754 | /* precomputation matches generator */
|
---|
1755 | have_pre_comp = 1;
|
---|
1756 | else
|
---|
1757 | /*
|
---|
1758 | * we don't have valid precomputation: treat the generator as a
|
---|
1759 | * random point
|
---|
1760 | */
|
---|
1761 | num_points++;
|
---|
1762 | }
|
---|
1763 |
|
---|
1764 | if (num_points > 0) {
|
---|
1765 | if (num_points >= 2) {
|
---|
1766 | /*
|
---|
1767 | * unless we precompute multiples for just one point, converting
|
---|
1768 | * those into affine form is time well spent
|
---|
1769 | */
|
---|
1770 | mixed = 1;
|
---|
1771 | }
|
---|
1772 | secrets = OPENSSL_zalloc(sizeof(*secrets) * num_points);
|
---|
1773 | pre_comp = OPENSSL_zalloc(sizeof(*pre_comp) * num_points);
|
---|
1774 | if (mixed)
|
---|
1775 | tmp_felems =
|
---|
1776 | OPENSSL_malloc(sizeof(*tmp_felems) * (num_points * 17 + 1));
|
---|
1777 | if ((secrets == NULL) || (pre_comp == NULL)
|
---|
1778 | || (mixed && (tmp_felems == NULL)))
|
---|
1779 | goto err;
|
---|
1780 |
|
---|
1781 | /*
|
---|
1782 | * we treat NULL scalars as 0, and NULL points as points at infinity,
|
---|
1783 | * i.e., they contribute nothing to the linear combination
|
---|
1784 | */
|
---|
1785 | for (i = 0; i < num_points; ++i) {
|
---|
1786 | if (i == num) {
|
---|
1787 | /*
|
---|
1788 | * we didn't have a valid precomputation, so we pick the
|
---|
1789 | * generator
|
---|
1790 | */
|
---|
1791 | p = EC_GROUP_get0_generator(group);
|
---|
1792 | p_scalar = scalar;
|
---|
1793 | } else {
|
---|
1794 | /* the i^th point */
|
---|
1795 | p = points[i];
|
---|
1796 | p_scalar = scalars[i];
|
---|
1797 | }
|
---|
1798 | if (p_scalar != NULL && p != NULL) {
|
---|
1799 | /* reduce scalar to 0 <= scalar < 2^384 */
|
---|
1800 | if ((BN_num_bits(p_scalar) > 384)
|
---|
1801 | || (BN_is_negative(p_scalar))) {
|
---|
1802 | /*
|
---|
1803 | * this is an unusual input, and we don't guarantee
|
---|
1804 | * constant-timeness
|
---|
1805 | */
|
---|
1806 | if (!BN_nnmod(tmp_scalar, p_scalar, group->order, ctx)) {
|
---|
1807 | ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
|
---|
1808 | goto err;
|
---|
1809 | }
|
---|
1810 | num_bytes = BN_bn2lebinpad(tmp_scalar,
|
---|
1811 | secrets[i], sizeof(secrets[i]));
|
---|
1812 | } else {
|
---|
1813 | num_bytes = BN_bn2lebinpad(p_scalar,
|
---|
1814 | secrets[i], sizeof(secrets[i]));
|
---|
1815 | }
|
---|
1816 | if (num_bytes < 0) {
|
---|
1817 | ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
|
---|
1818 | goto err;
|
---|
1819 | }
|
---|
1820 | /* precompute multiples */
|
---|
1821 | if ((!BN_to_felem(x_out, p->X)) ||
|
---|
1822 | (!BN_to_felem(y_out, p->Y)) ||
|
---|
1823 | (!BN_to_felem(z_out, p->Z)))
|
---|
1824 | goto err;
|
---|
1825 | memcpy(pre_comp[i][1][0], x_out, sizeof(felem));
|
---|
1826 | memcpy(pre_comp[i][1][1], y_out, sizeof(felem));
|
---|
1827 | memcpy(pre_comp[i][1][2], z_out, sizeof(felem));
|
---|
1828 | for (j = 2; j <= 16; ++j) {
|
---|
1829 | if (j & 1) {
|
---|
1830 | point_add(pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
|
---|
1831 | pre_comp[i][1][0], pre_comp[i][1][1], pre_comp[i][1][2], 0,
|
---|
1832 | pre_comp[i][j - 1][0], pre_comp[i][j - 1][1], pre_comp[i][j - 1][2]);
|
---|
1833 | } else {
|
---|
1834 | point_double(pre_comp[i][j][0], pre_comp[i][j][1], pre_comp[i][j][2],
|
---|
1835 | pre_comp[i][j / 2][0], pre_comp[i][j / 2][1], pre_comp[i][j / 2][2]);
|
---|
1836 | }
|
---|
1837 | }
|
---|
1838 | }
|
---|
1839 | }
|
---|
1840 | if (mixed)
|
---|
1841 | make_points_affine(num_points * 17, pre_comp[0], tmp_felems);
|
---|
1842 | }
|
---|
1843 |
|
---|
1844 | /* the scalar for the generator */
|
---|
1845 | if (scalar != NULL && have_pre_comp) {
|
---|
1846 | memset(g_secret, 0, sizeof(g_secret));
|
---|
1847 | /* reduce scalar to 0 <= scalar < 2^384 */
|
---|
1848 | if ((BN_num_bits(scalar) > 384) || (BN_is_negative(scalar))) {
|
---|
1849 | /*
|
---|
1850 | * this is an unusual input, and we don't guarantee
|
---|
1851 | * constant-timeness
|
---|
1852 | */
|
---|
1853 | if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) {
|
---|
1854 | ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
|
---|
1855 | goto err;
|
---|
1856 | }
|
---|
1857 | num_bytes = BN_bn2lebinpad(tmp_scalar, g_secret, sizeof(g_secret));
|
---|
1858 | } else {
|
---|
1859 | num_bytes = BN_bn2lebinpad(scalar, g_secret, sizeof(g_secret));
|
---|
1860 | }
|
---|
1861 | /* do the multiplication with generator precomputation */
|
---|
1862 | batch_mul(x_out, y_out, z_out,
|
---|
1863 | (const felem_bytearray(*))secrets, num_points,
|
---|
1864 | g_secret,
|
---|
1865 | mixed, (const felem(*)[17][3])pre_comp,
|
---|
1866 | (const felem(*)[3])g_pre_comp);
|
---|
1867 | } else {
|
---|
1868 | /* do the multiplication without generator precomputation */
|
---|
1869 | batch_mul(x_out, y_out, z_out,
|
---|
1870 | (const felem_bytearray(*))secrets, num_points,
|
---|
1871 | NULL, mixed, (const felem(*)[17][3])pre_comp, NULL);
|
---|
1872 | }
|
---|
1873 | /* reduce the output to its unique minimal representation */
|
---|
1874 | felem_contract(x_in, x_out);
|
---|
1875 | felem_contract(y_in, y_out);
|
---|
1876 | felem_contract(z_in, z_out);
|
---|
1877 | if ((!felem_to_BN(x, x_in)) || (!felem_to_BN(y, y_in)) ||
|
---|
1878 | (!felem_to_BN(z, z_in))) {
|
---|
1879 | ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB);
|
---|
1880 | goto err;
|
---|
1881 | }
|
---|
1882 | ret = ossl_ec_GFp_simple_set_Jprojective_coordinates_GFp(group, r, x, y, z,
|
---|
1883 | ctx);
|
---|
1884 |
|
---|
1885 | err:
|
---|
1886 | BN_CTX_end(ctx);
|
---|
1887 | EC_POINT_free(generator);
|
---|
1888 | OPENSSL_free(secrets);
|
---|
1889 | OPENSSL_free(pre_comp);
|
---|
1890 | OPENSSL_free(tmp_felems);
|
---|
1891 | return ret;
|
---|
1892 | }
|
---|
1893 |
|
---|
1894 | int ossl_ec_GFp_nistp384_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
|
---|
1895 | {
|
---|
1896 | int ret = 0;
|
---|
1897 | NISTP384_PRE_COMP *pre = NULL;
|
---|
1898 | int i, j;
|
---|
1899 | BIGNUM *x, *y;
|
---|
1900 | EC_POINT *generator = NULL;
|
---|
1901 | felem tmp_felems[16];
|
---|
1902 | #ifndef FIPS_MODULE
|
---|
1903 | BN_CTX *new_ctx = NULL;
|
---|
1904 | #endif
|
---|
1905 |
|
---|
1906 | /* throw away old precomputation */
|
---|
1907 | EC_pre_comp_free(group);
|
---|
1908 |
|
---|
1909 | #ifndef FIPS_MODULE
|
---|
1910 | if (ctx == NULL)
|
---|
1911 | ctx = new_ctx = BN_CTX_new();
|
---|
1912 | #endif
|
---|
1913 | if (ctx == NULL)
|
---|
1914 | return 0;
|
---|
1915 |
|
---|
1916 | BN_CTX_start(ctx);
|
---|
1917 | x = BN_CTX_get(ctx);
|
---|
1918 | y = BN_CTX_get(ctx);
|
---|
1919 | if (y == NULL)
|
---|
1920 | goto err;
|
---|
1921 | /* get the generator */
|
---|
1922 | if (group->generator == NULL)
|
---|
1923 | goto err;
|
---|
1924 | generator = EC_POINT_new(group);
|
---|
1925 | if (generator == NULL)
|
---|
1926 | goto err;
|
---|
1927 | BN_bin2bn(nistp384_curve_params[3], sizeof(felem_bytearray), x);
|
---|
1928 | BN_bin2bn(nistp384_curve_params[4], sizeof(felem_bytearray), y);
|
---|
1929 | if (!EC_POINT_set_affine_coordinates(group, generator, x, y, ctx))
|
---|
1930 | goto err;
|
---|
1931 | if ((pre = nistp384_pre_comp_new()) == NULL)
|
---|
1932 | goto err;
|
---|
1933 | /*
|
---|
1934 | * if the generator is the standard one, use built-in precomputation
|
---|
1935 | */
|
---|
1936 | if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
|
---|
1937 | memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
|
---|
1938 | goto done;
|
---|
1939 | }
|
---|
1940 | if ((!BN_to_felem(pre->g_pre_comp[1][0], group->generator->X)) ||
|
---|
1941 | (!BN_to_felem(pre->g_pre_comp[1][1], group->generator->Y)) ||
|
---|
1942 | (!BN_to_felem(pre->g_pre_comp[1][2], group->generator->Z)))
|
---|
1943 | goto err;
|
---|
1944 | /* compute 2^95*G, 2^190*G, 2^285*G */
|
---|
1945 | for (i = 1; i <= 4; i <<= 1) {
|
---|
1946 | point_double(pre->g_pre_comp[2 * i][0], pre->g_pre_comp[2 * i][1], pre->g_pre_comp[2 * i][2],
|
---|
1947 | pre->g_pre_comp[i][0], pre->g_pre_comp[i][1], pre->g_pre_comp[i][2]);
|
---|
1948 | for (j = 0; j < 94; ++j) {
|
---|
1949 | point_double(pre->g_pre_comp[2 * i][0], pre->g_pre_comp[2 * i][1], pre->g_pre_comp[2 * i][2],
|
---|
1950 | pre->g_pre_comp[2 * i][0], pre->g_pre_comp[2 * i][1], pre->g_pre_comp[2 * i][2]);
|
---|
1951 | }
|
---|
1952 | }
|
---|
1953 | /* g_pre_comp[0] is the point at infinity */
|
---|
1954 | memset(pre->g_pre_comp[0], 0, sizeof(pre->g_pre_comp[0]));
|
---|
1955 | /* the remaining multiples */
|
---|
1956 | /* 2^95*G + 2^190*G */
|
---|
1957 | point_add(pre->g_pre_comp[6][0], pre->g_pre_comp[6][1], pre->g_pre_comp[6][2],
|
---|
1958 | pre->g_pre_comp[4][0], pre->g_pre_comp[4][1], pre->g_pre_comp[4][2], 0,
|
---|
1959 | pre->g_pre_comp[2][0], pre->g_pre_comp[2][1], pre->g_pre_comp[2][2]);
|
---|
1960 | /* 2^95*G + 2^285*G */
|
---|
1961 | point_add(pre->g_pre_comp[10][0], pre->g_pre_comp[10][1], pre->g_pre_comp[10][2],
|
---|
1962 | pre->g_pre_comp[8][0], pre->g_pre_comp[8][1], pre->g_pre_comp[8][2], 0,
|
---|
1963 | pre->g_pre_comp[2][0], pre->g_pre_comp[2][1], pre->g_pre_comp[2][2]);
|
---|
1964 | /* 2^190*G + 2^285*G */
|
---|
1965 | point_add(pre->g_pre_comp[12][0], pre->g_pre_comp[12][1], pre->g_pre_comp[12][2],
|
---|
1966 | pre->g_pre_comp[8][0], pre->g_pre_comp[8][1], pre->g_pre_comp[8][2], 0,
|
---|
1967 | pre->g_pre_comp[4][0], pre->g_pre_comp[4][1], pre->g_pre_comp[4][2]);
|
---|
1968 | /* 2^95*G + 2^190*G + 2^285*G */
|
---|
1969 | point_add(pre->g_pre_comp[14][0], pre->g_pre_comp[14][1], pre->g_pre_comp[14][2],
|
---|
1970 | pre->g_pre_comp[12][0], pre->g_pre_comp[12][1], pre->g_pre_comp[12][2], 0,
|
---|
1971 | pre->g_pre_comp[2][0], pre->g_pre_comp[2][1], pre->g_pre_comp[2][2]);
|
---|
1972 | for (i = 1; i < 8; ++i) {
|
---|
1973 | /* odd multiples: add G */
|
---|
1974 | point_add(pre->g_pre_comp[2 * i + 1][0], pre->g_pre_comp[2 * i + 1][1], pre->g_pre_comp[2 * i + 1][2],
|
---|
1975 | pre->g_pre_comp[2 * i][0], pre->g_pre_comp[2 * i][1], pre->g_pre_comp[2 * i][2], 0,
|
---|
1976 | pre->g_pre_comp[1][0], pre->g_pre_comp[1][1], pre->g_pre_comp[1][2]);
|
---|
1977 | }
|
---|
1978 | make_points_affine(15, &(pre->g_pre_comp[1]), tmp_felems);
|
---|
1979 |
|
---|
1980 | done:
|
---|
1981 | SETPRECOMP(group, nistp384, pre);
|
---|
1982 | ret = 1;
|
---|
1983 | pre = NULL;
|
---|
1984 | err:
|
---|
1985 | BN_CTX_end(ctx);
|
---|
1986 | EC_POINT_free(generator);
|
---|
1987 | #ifndef FIPS_MODULE
|
---|
1988 | BN_CTX_free(new_ctx);
|
---|
1989 | #endif
|
---|
1990 | ossl_ec_nistp384_pre_comp_free(pre);
|
---|
1991 | return ret;
|
---|
1992 | }
|
---|
1993 |
|
---|
1994 | int ossl_ec_GFp_nistp384_have_precompute_mult(const EC_GROUP *group)
|
---|
1995 | {
|
---|
1996 | return HAVEPRECOMP(group, nistp384);
|
---|
1997 | }
|
---|