1 // Copyright 2017 Google Inc.
\r
3 // Licensed under the Apache License, Version 2.0 (the "License");
\r
4 // you may not use this file except in compliance with the License.
\r
5 // You may obtain a copy of the License at
\r
7 // http://www.apache.org/licenses/LICENSE-2.0
\r
9 // Unless required by applicable law or agreed to in writing, software
\r
10 // distributed under the License is distributed on an "AS IS" BASIS,
\r
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
\r
12 // See the License for the specific language governing permissions and
\r
13 // limitations under the License.
\r
15 ////////////////////////////////////////////////////////////////////////////////
\r
17 package com.google.crypto.tink.subtle;
\r
19 import com.google.crypto.tink.annotations.Alpha;
\r
21 import java.util.Arrays;
\r
24 * Defines field 25519 function based on <a
\r
25 * href="https://github.com/agl/curve25519-donna/blob/master/curve25519-donna.c">curve25519-donna C
\r
26 * implementation</a> (mostly identical).
\r
28 * <p>Field elements are written as an array of signed, 64-bit limbs (an array of longs), least
\r
29 * significant first. The value of the field element is:
\r
32 * x[0] + 2^26·x[1] + 2^51·x[2] + 2^77·x[3] + 2^102·x[4] + 2^128·x[5] + 2^153·x[6] + 2^179·x[7] +
\r
33 * 2^204·x[8] + 2^230·x[9],
\r
36 * <p>i.e. the limbs are 26, 25, 26, 25, ... bits wide.
\r
39 final class Field25519 {
\r
41 * During Field25519 computation, the mixed radix representation may be in different forms:
\r
43 * <li> Reduced-size form: the array has size at most 10.
\r
44 * <li> Non-reduced-size form: the array is not reduced modulo 2^255 - 19 and has size at most
\r
50 * <li> Clarify ill-defined terminologies.
\r
51 * <li> The reduction procedure is different from DJB's paper
\r
52 * (http://cr.yp.to/ecdh/curve25519-20060209.pdf). The coefficients after reducing degree and
\r
53 * reducing coefficients aren't guaranteed to be in range {-2^25, ..., 2^25}. We should check to
\r
54 * see what's going on.
\r
55 * <li> Consider using method mult() everywhere and making product() private.
\r
59 static final int FIELD_LEN = 32;
\r
60 static final int LIMB_CNT = 10;
\r
61 private static final long TWO_TO_25 = 1 << 25;
\r
62 private static final long TWO_TO_26 = TWO_TO_25 << 1;
\r
64 private static final int[] EXPAND_START = {0, 3, 6, 9, 12, 16, 19, 22, 25, 28};
\r
65 private static final int[] EXPAND_SHIFT = {0, 2, 3, 5, 6, 0, 1, 3, 4, 6};
\r
66 private static final int[] MASK = {0x3ffffff, 0x1ffffff};
\r
67 private static final int[] SHIFT = {26, 25};
\r
70 * Sums two numbers: output = in1 + in2
\r
72 * On entry: in1, in2 are in reduced-size form.
\r
74 static void sum(long[] output, long[] in1, long[] in2) {
\r
75 for (int i = 0; i < LIMB_CNT; i++) {
\r
76 output[i] = in1[i] + in2[i];
\r
81 * Sums two numbers: output += in
\r
83 * On entry: in is in reduced-size form.
\r
85 static void sum(long[] output, long[] in) {
\r
86 sum(output, output, in);
\r
90 * Find the difference of two numbers: output = in1 - in2
\r
91 * (note the order of the arguments!).
\r
93 * On entry: in1, in2 are in reduced-size form.
\r
95 static void sub(long[] output, long[] in1, long[] in2) {
\r
96 for (int i = 0; i < LIMB_CNT; i++) {
\r
97 output[i] = in1[i] - in2[i];
\r
102 * Find the difference of two numbers: output = in - output
\r
103 * (note the order of the arguments!).
\r
105 * On entry: in, output are in reduced-size form.
\r
107 static void sub(long[] output, long[] in) {
\r
108 sub(output, in, output);
\r
112 * Multiply a number by a scalar: output = in * scalar
\r
114 static void scalarProduct(long[] output, long[] in, long scalar) {
\r
115 for (int i = 0; i < LIMB_CNT; i++) {
\r
116 output[i] = in[i] * scalar;
\r
121 * Multiply two numbers: out = in2 * in
\r
123 * output must be distinct to both inputs. The inputs are reduced coefficient form,
\r
124 * the output is not.
\r
126 * out[x] <= 14 * the largest product of the input limbs.
\r
128 static void product(long[] out, long[] in2, long[] in) {
\r
129 out[0] = in2[0] * in[0];
\r
130 out[1] = in2[0] * in[1]
\r
132 out[2] = 2 * in2[1] * in[1]
\r
135 out[3] = in2[1] * in[2]
\r
139 out[4] = in2[2] * in[2]
\r
140 + 2 * (in2[1] * in[3] + in2[3] * in[1])
\r
143 out[5] = in2[2] * in[3]
\r
149 out[6] = 2 * (in2[3] * in[3] + in2[1] * in[5] + in2[5] * in[1])
\r
154 out[7] = in2[3] * in[4]
\r
162 out[8] = in2[4] * in[4]
\r
163 + 2 * (in2[3] * in[5] + in2[5] * in[3] + in2[1] * in[7] + in2[7] * in[1])
\r
168 out[9] = in2[4] * in[5]
\r
179 2 * (in2[5] * in[5] + in2[3] * in[7] + in2[7] * in[3] + in2[1] * in[9] + in2[9] * in[1])
\r
184 out[11] = in2[5] * in[6]
\r
192 out[12] = in2[6] * in[6]
\r
193 + 2 * (in2[5] * in[7] + in2[7] * in[5] + in2[3] * in[9] + in2[9] * in[3])
\r
196 out[13] = in2[6] * in[7]
\r
202 out[14] = 2 * (in2[7] * in[7] + in2[5] * in[9] + in2[9] * in[5])
\r
205 out[15] = in2[7] * in[8]
\r
209 out[16] = in2[8] * in[8]
\r
210 + 2 * (in2[7] * in[9] + in2[9] * in[7]);
\r
211 out[17] = in2[8] * in[9]
\r
213 out[18] = 2 * in2[9] * in[9];
\r
217 * Reduce a long form to a reduced-size form by taking the input mod 2^255 - 19.
\r
219 * On entry: |output[i]| < 14*2^54
\r
220 * On exit: |output[0..8]| < 280*2^54
\r
222 static void reduceSizeByModularReduction(long[] output) {
\r
223 // The coefficients x[10], x[11],..., x[18] are eliminated by reduction modulo 2^255 - 19.
\r
224 // For example, the coefficient x[18] is multiplied by 19 and added to the coefficient x[8].
\r
226 // Each of these shifts and adds ends up multiplying the value by 19.
\r
228 // For output[0..8], the absolute entry value is < 14*2^54 and we add, at most, 19*14*2^54 thus,
\r
229 // on exit, |output[0..8]| < 280*2^54.
\r
230 output[8] += output[18] << 4;
\r
231 output[8] += output[18] << 1;
\r
232 output[8] += output[18];
\r
233 output[7] += output[17] << 4;
\r
234 output[7] += output[17] << 1;
\r
235 output[7] += output[17];
\r
236 output[6] += output[16] << 4;
\r
237 output[6] += output[16] << 1;
\r
238 output[6] += output[16];
\r
239 output[5] += output[15] << 4;
\r
240 output[5] += output[15] << 1;
\r
241 output[5] += output[15];
\r
242 output[4] += output[14] << 4;
\r
243 output[4] += output[14] << 1;
\r
244 output[4] += output[14];
\r
245 output[3] += output[13] << 4;
\r
246 output[3] += output[13] << 1;
\r
247 output[3] += output[13];
\r
248 output[2] += output[12] << 4;
\r
249 output[2] += output[12] << 1;
\r
250 output[2] += output[12];
\r
251 output[1] += output[11] << 4;
\r
252 output[1] += output[11] << 1;
\r
253 output[1] += output[11];
\r
254 output[0] += output[10] << 4;
\r
255 output[0] += output[10] << 1;
\r
256 output[0] += output[10];
\r
260 * Reduce all coefficients of the short form input so that |x| < 2^26.
\r
262 * On entry: |output[i]| < 280*2^54
\r
264 static void reduceCoefficients(long[] output) {
\r
267 for (int i = 0; i < LIMB_CNT; i += 2) {
\r
268 long over = output[i] / TWO_TO_26;
\r
269 // The entry condition (that |output[i]| < 280*2^54) means that over is, at most, 280*2^28 in
\r
270 // the first iteration of this loop. This is added to the next limb and we can approximate the
\r
271 // resulting bound of that limb by 281*2^54.
\r
272 output[i] -= over << 26;
\r
273 output[i + 1] += over;
\r
275 // For the first iteration, |output[i+1]| < 281*2^54, thus |over| < 281*2^29. When this is
\r
276 // added to the next limb, the resulting bound can be approximated as 281*2^54.
\r
278 // For subsequent iterations of the loop, 281*2^54 remains a conservative bound and no
\r
279 // overflow occurs.
\r
280 over = output[i + 1] / TWO_TO_25;
\r
281 output[i + 1] -= over << 25;
\r
282 output[i + 2] += over;
\r
284 // Now |output[10]| < 281*2^29 and all other coefficients are reduced.
\r
285 output[0] += output[10] << 4;
\r
286 output[0] += output[10] << 1;
\r
287 output[0] += output[10];
\r
290 // Now output[1..9] are reduced, and |output[0]| < 2^26 + 19*281*2^29 so |over| will be no more
\r
292 long over = output[0] / TWO_TO_26;
\r
293 output[0] -= over << 26;
\r
295 // Now output[0,2..9] are reduced, and |output[1]| < 2^25 + 2^16 < 2^26. The bound on
\r
296 // |output[1]| is sufficient to meet our needs.
\r
300 * A helpful wrapper around {@ref Field25519#product}: output = in * in2.
\r
302 * On entry: |in[i]| < 2^27 and |in2[i]| < 2^27.
\r
304 * The output is reduced degree (indeed, one need only provide storage for 10 limbs) and
\r
305 * |output[i]| < 2^26.
\r
307 static void mult(long[] output, long[] in, long[] in2) {
\r
308 long[] t = new long[19];
\r
309 product(t, in, in2);
\r
310 // |t[i]| < 14*2^54
\r
311 reduceSizeByModularReduction(t);
\r
312 reduceCoefficients(t);
\r
314 System.arraycopy(t, 0, output, 0, LIMB_CNT);
\r
318 * Square a number: out = in**2
\r
320 * output must be distinct from the input. The inputs are reduced coefficient form, the output is
\r
323 * out[x] <= 14 * the largest product of the input limbs.
\r
325 private static void squareInner(long[] out, long[] in) {
\r
326 out[0] = in[0] * in[0];
\r
327 out[1] = 2 * in[0] * in[1];
\r
328 out[2] = 2 * (in[1] * in[1] + in[0] * in[2]);
\r
329 out[3] = 2 * (in[1] * in[2] + in[0] * in[3]);
\r
330 out[4] = in[2] * in[2]
\r
331 + 4 * in[1] * in[3]
\r
332 + 2 * in[0] * in[4];
\r
333 out[5] = 2 * (in[2] * in[3] + in[1] * in[4] + in[0] * in[5]);
\r
334 out[6] = 2 * (in[3] * in[3] + in[2] * in[4] + in[0] * in[6] + 2 * in[1] * in[5]);
\r
335 out[7] = 2 * (in[3] * in[4] + in[2] * in[5] + in[1] * in[6] + in[0] * in[7]);
\r
336 out[8] = in[4] * in[4]
\r
337 + 2 * (in[2] * in[6] + in[0] * in[8] + 2 * (in[1] * in[7] + in[3] * in[5]));
\r
338 out[9] = 2 * (in[4] * in[5] + in[3] * in[6] + in[2] * in[7] + in[1] * in[8] + in[0] * in[9]);
\r
339 out[10] = 2 * (in[5] * in[5]
\r
342 + 2 * (in[3] * in[7] + in[1] * in[9]));
\r
343 out[11] = 2 * (in[5] * in[6] + in[4] * in[7] + in[3] * in[8] + in[2] * in[9]);
\r
344 out[12] = in[6] * in[6]
\r
345 + 2 * (in[4] * in[8] + 2 * (in[5] * in[7] + in[3] * in[9]));
\r
346 out[13] = 2 * (in[6] * in[7] + in[5] * in[8] + in[4] * in[9]);
\r
347 out[14] = 2 * (in[7] * in[7] + in[6] * in[8] + 2 * in[5] * in[9]);
\r
348 out[15] = 2 * (in[7] * in[8] + in[6] * in[9]);
\r
349 out[16] = in[8] * in[8] + 4 * in[7] * in[9];
\r
350 out[17] = 2 * in[8] * in[9];
\r
351 out[18] = 2 * in[9] * in[9];
\r
357 * On entry: The |in| argument is in reduced coefficients form and |in[i]| < 2^27.
\r
359 * On exit: The |output| argument is in reduced coefficients form (indeed, one need only provide
\r
360 * storage for 10 limbs) and |out[i]| < 2^26.
\r
362 static void square(long[] output, long[] in) {
\r
363 long[] t = new long[19];
\r
364 squareInner(t, in);
\r
365 // |t[i]| < 14*2^54 because the largest product of two limbs will be < 2^(27+27) and SquareInner
\r
366 // adds together, at most, 14 of those products.
\r
367 reduceSizeByModularReduction(t);
\r
368 reduceCoefficients(t);
\r
370 System.arraycopy(t, 0, output, 0, LIMB_CNT);
\r
374 * Takes a little-endian, 32-byte number and expands it into mixed radix form.
\r
376 static long[] expand(byte[] input) {
\r
377 long[] output = new long[LIMB_CNT];
\r
378 for (int i = 0; i < LIMB_CNT; i++) {
\r
379 output[i] = ((((long) (input[EXPAND_START[i]] & 0xff))
\r
380 | ((long) (input[EXPAND_START[i] + 1] & 0xff)) << 8
\r
381 | ((long) (input[EXPAND_START[i] + 2] & 0xff)) << 16
\r
382 | ((long) (input[EXPAND_START[i] + 3] & 0xff)) << 24) >> EXPAND_SHIFT[i]) & MASK[i & 1];
\r
388 * Takes a fully reduced mixed radix form number and contract it into a little-endian, 32-byte
\r
391 * On entry: |input_limbs[i]| < 2^26
\r
393 @SuppressWarnings("NarrowingCompoundAssignment")
\r
394 static byte[] contract(long[] inputLimbs) {
\r
395 long[] input = Arrays.copyOf(inputLimbs, LIMB_CNT);
\r
396 for (int j = 0; j < 2; j++) {
\r
397 for (int i = 0; i < 9; i++) {
\r
398 // This calculation is a time-invariant way to make input[i] non-negative by borrowing
\r
399 // from the next-larger limb.
\r
400 int carry = -(int) ((input[i] & (input[i] >> 31)) >> SHIFT[i & 1]);
\r
401 input[i] = input[i] + (carry << SHIFT[i & 1]);
\r
402 input[i + 1] -= carry;
\r
405 // There's no greater limb for input[9] to borrow from, but we can multiply by 19 and borrow
\r
406 // from input[0], which is valid mod 2^255-19.
\r
408 int carry = -(int) ((input[9] & (input[9] >> 31)) >> 25);
\r
409 input[9] += (carry << 25);
\r
410 input[0] -= (carry * 19);
\r
413 // After the first iteration, input[1..9] are non-negative and fit within 25 or 26 bits,
\r
414 // depending on position. However, input[0] may be negative.
\r
417 // The first borrow-propagation pass above ended with every limb except (possibly) input[0]
\r
420 // If input[0] was negative after the first pass, then it was because of a carry from input[9].
\r
421 // On entry, input[9] < 2^26 so the carry was, at most, one, since (2**26-1) >> 25 = 1. Thus
\r
422 // input[0] >= -19.
\r
424 // In the second pass, each limb is decreased by at most one. Thus the second borrow-propagation
\r
425 // pass could only have wrapped around to decrease input[0] again if the first pass left
\r
426 // input[0] negative *and* input[1] through input[9] were all zero. In that case, input[1] is
\r
427 // now 2^25 - 1, and this last borrow-propagation step will leave input[1] non-negative.
\r
429 int carry = -(int) ((input[0] & (input[0] >> 31)) >> 26);
\r
430 input[0] += (carry << 26);
\r
434 // All input[i] are now non-negative. However, there might be values between 2^25 and 2^26 in a
\r
435 // limb which is, nominally, 25 bits wide.
\r
436 for (int j = 0; j < 2; j++) {
\r
437 for (int i = 0; i < 9; i++) {
\r
438 int carry = (int) (input[i] >> SHIFT[i & 1]);
\r
439 input[i] &= MASK[i & 1];
\r
440 input[i + 1] += carry;
\r
445 int carry = (int) (input[9] >> 25);
\r
446 input[9] &= 0x1ffffff;
\r
447 input[0] += 19 * carry;
\r
450 // If the first carry-chain pass, just above, ended up with a carry from input[9], and that
\r
451 // caused input[0] to be out-of-bounds, then input[0] was < 2^26 + 2*19, because the carry was,
\r
454 // If the second pass carried from input[9] again then input[0] is < 2*19 and the input[9] ->
\r
455 // input[0] carry didn't push input[0] out of bounds.
\r
457 // It still remains the case that input might be between 2^255-19 and 2^255. In this case,
\r
458 // input[1..9] must take their maximum value and input[0] must be >= (2^255-19) & 0x3ffffff,
\r
459 // which is 0x3ffffed.
\r
460 int mask = gte((int) input[0], 0x3ffffed);
\r
461 for (int i = 1; i < LIMB_CNT; i++) {
\r
462 mask &= eq((int) input[i], MASK[i & 1]);
\r
465 // mask is either 0xffffffff (if input >= 2^255-19) and zero otherwise. Thus this conditionally
\r
466 // subtracts 2^255-19.
\r
467 input[0] -= mask & 0x3ffffed;
\r
468 input[1] -= mask & 0x1ffffff;
\r
469 for (int i = 2; i < LIMB_CNT; i += 2) {
\r
470 input[i] -= mask & 0x3ffffff;
\r
471 input[i + 1] -= mask & 0x1ffffff;
\r
474 for (int i = 0; i < LIMB_CNT; i++) {
\r
475 input[i] <<= EXPAND_SHIFT[i];
\r
477 byte[] output = new byte[FIELD_LEN];
\r
478 for (int i = 0; i < LIMB_CNT; i++) {
\r
479 output[EXPAND_START[i]] |= input[i] & 0xff;
\r
480 output[EXPAND_START[i] + 1] |= (input[i] >> 8) & 0xff;
\r
481 output[EXPAND_START[i] + 2] |= (input[i] >> 16) & 0xff;
\r
482 output[EXPAND_START[i] + 3] |= (input[i] >> 24) & 0xff;
\r
488 * Computes inverse of z = z(2^255 - 21)
\r
490 * Shamelessly copied from agl's code which was shamelessly copied from djb's code. Only the
\r
491 * comment format and the variable namings are different from those.
\r
493 static void inverse(long[] out, long[] z) {
\r
494 long[] z2 = new long[Field25519.LIMB_CNT];
\r
495 long[] z9 = new long[Field25519.LIMB_CNT];
\r
496 long[] z11 = new long[Field25519.LIMB_CNT];
\r
497 long[] z2To5Minus1 = new long[Field25519.LIMB_CNT];
\r
498 long[] z2To10Minus1 = new long[Field25519.LIMB_CNT];
\r
499 long[] z2To20Minus1 = new long[Field25519.LIMB_CNT];
\r
500 long[] z2To50Minus1 = new long[Field25519.LIMB_CNT];
\r
501 long[] z2To100Minus1 = new long[Field25519.LIMB_CNT];
\r
502 long[] t0 = new long[Field25519.LIMB_CNT];
\r
503 long[] t1 = new long[Field25519.LIMB_CNT];
\r
505 square(z2, z); // 2
\r
506 square(t1, z2); // 4
\r
507 square(t0, t1); // 8
\r
508 mult(z9, t0, z); // 9
\r
509 mult(z11, z9, z2); // 11
\r
510 square(t0, z11); // 22
\r
511 mult(z2To5Minus1, t0, z9); // 2^5 - 2^0 = 31
\r
513 square(t0, z2To5Minus1); // 2^6 - 2^1
\r
514 square(t1, t0); // 2^7 - 2^2
\r
515 square(t0, t1); // 2^8 - 2^3
\r
516 square(t1, t0); // 2^9 - 2^4
\r
517 square(t0, t1); // 2^10 - 2^5
\r
518 mult(z2To10Minus1, t0, z2To5Minus1); // 2^10 - 2^0
\r
520 square(t0, z2To10Minus1); // 2^11 - 2^1
\r
521 square(t1, t0); // 2^12 - 2^2
\r
522 for (int i = 2; i < 10; i += 2) { // 2^20 - 2^10
\r
526 mult(z2To20Minus1, t1, z2To10Minus1); // 2^20 - 2^0
\r
528 square(t0, z2To20Minus1); // 2^21 - 2^1
\r
529 square(t1, t0); // 2^22 - 2^2
\r
530 for (int i = 2; i < 20; i += 2) { // 2^40 - 2^20
\r
534 mult(t0, t1, z2To20Minus1); // 2^40 - 2^0
\r
536 square(t1, t0); // 2^41 - 2^1
\r
537 square(t0, t1); // 2^42 - 2^2
\r
538 for (int i = 2; i < 10; i += 2) { // 2^50 - 2^10
\r
542 mult(z2To50Minus1, t0, z2To10Minus1); // 2^50 - 2^0
\r
544 square(t0, z2To50Minus1); // 2^51 - 2^1
\r
545 square(t1, t0); // 2^52 - 2^2
\r
546 for (int i = 2; i < 50; i += 2) { // 2^100 - 2^50
\r
550 mult(z2To100Minus1, t1, z2To50Minus1); // 2^100 - 2^0
\r
552 square(t1, z2To100Minus1); // 2^101 - 2^1
\r
553 square(t0, t1); // 2^102 - 2^2
\r
554 for (int i = 2; i < 100; i += 2) { // 2^200 - 2^100
\r
558 mult(t1, t0, z2To100Minus1); // 2^200 - 2^0
\r
560 square(t0, t1); // 2^201 - 2^1
\r
561 square(t1, t0); // 2^202 - 2^2
\r
562 for (int i = 2; i < 50; i += 2) { // 2^250 - 2^50
\r
566 mult(t0, t1, z2To50Minus1); // 2^250 - 2^0
\r
568 square(t1, t0); // 2^251 - 2^1
\r
569 square(t0, t1); // 2^252 - 2^2
\r
570 square(t1, t0); // 2^253 - 2^3
\r
571 square(t0, t1); // 2^254 - 2^4
\r
572 square(t1, t0); // 2^255 - 2^5
\r
573 mult(out, t1, z11); // 2^255 - 21
\r
578 * Returns 0xffffffff iff a == b and zero otherwise.
\r
580 private static int eq(int a, int b) {
\r
591 * returns 0xffffffff if a >= b and zero otherwise, where a and b are both non-negative.
\r
593 private static int gte(int a, int b) {
\r
595 // a >= 0 iff a >= b.
\r