vendor/github.com/btcsuite/btcd/btcec/field.go

   1 // Copyright (c) 2013-2016 The btcsuite developers
   2 // Copyright (c) 2013-2016 Dave Collins
   3 // Use of this source code is governed by an ISC
   4 // license that can be found in the LICENSE file.
   5
   6 package btcec
   7
   8 // References:
   9 //   [HAC]: Handbook of Applied Cryptography Menezes, van Oorschot, Vanstone.
  10 //     http://cacr.uwaterloo.ca/hac/
  11
  12 // All elliptic curve operations for secp256k1 are done in a finite field
  13 // characterized by a 256-bit prime.  Given this precision is larger than the
  14 // biggest available native type, obviously some form of bignum math is needed.
  15 // This package implements specialized fixed-precision field arithmetic rather
  16 // than relying on an arbitrary-precision arithmetic package such as math/big
  17 // for dealing with the field math since the size is known.  As a result, rather
  18 // large performance gains are achieved by taking advantage of many
  19 // optimizations not available to arbitrary-precision arithmetic and generic
  20 // modular arithmetic algorithms.
  21 //
  22 // There are various ways to internally represent each finite field element.
  23 // For example, the most obvious representation would be to use an array of 4
  24 // uint64s (64 bits * 4 = 256 bits).  However, that representation suffers from
  25 // a couple of issues.  First, there is no native Go type large enough to handle
  26 // the intermediate results while adding or multiplying two 64-bit numbers, and
  27 // second there is no space left for overflows when performing the intermediate
  28 // arithmetic between each array element which would lead to expensive carry
  29 // propagation.
  30 //
  31 // Given the above, this implementation represents the the field elements as
  32 // 10 uint32s with each word (array entry) treated as base 2^26.  This was
  33 // chosen for the following reasons:
  34 // 1) Most systems at the current time are 64-bit (or at least have 64-bit
  35 //    registers available for specialized purposes such as MMX) so the
  36 //    intermediate results can typically be done using a native register (and
  37 //    using uint64s to avoid the need for additional half-word arithmetic)
  38 // 2) In order to allow addition of the internal words without having to
  39 //    propagate the the carry, the max normalized value for each register must
  40 //    be less than the number of bits available in the register
  41 // 3) Since we're dealing with 32-bit values, 64-bits of overflow is a
  42 //    reasonable choice for #2
  43 // 4) Given the need for 256-bits of precision and the properties stated in #1,
  44 //    #2, and #3, the representation which best accommodates this is 10 uint32s
  45 //    with base 2^26 (26 bits * 10 = 260 bits, so the final word only needs 22
  46 //    bits) which leaves the desired 64 bits (32 * 10 = 320, 320 - 256 = 64) for
  47 //    overflow
  48 //
  49 // Since it is so important that the field arithmetic is extremely fast for
  50 // high performance crypto, this package does not perform any validation where
  51 // it ordinarily would.  For example, some functions only give the correct
  52 // result is the field is normalized and there is no checking to ensure it is.
  53 // While I typically prefer to ensure all state and input is valid for most
  54 // packages, this code is really only used internally and every extra check
  55 // counts.
  56
  57 import (
  58         "encoding/hex"
  59 )
  60
  61 // Constants used to make the code more readable.
  62 const (
  63         twoBitsMask   = 0x3
  64         fourBitsMask  = 0xf
  65         sixBitsMask   = 0x3f
  66         eightBitsMask = 0xff
  67 )
  68
  69 // Constants related to the field representation.
  70 const (
  71         // fieldWords is the number of words used to internally represent the
  72         // 256-bit value.
  73         fieldWords = 10
  74
  75         // fieldBase is the exponent used to form the numeric base of each word.
  76         // 2^(fieldBase*i) where i is the word position.
  77         fieldBase = 26
  78
  79         // fieldOverflowBits is the minimum number of "overflow" bits for each
  80         // word in the field value.
  81         fieldOverflowBits = 32 - fieldBase
  82
  83         // fieldBaseMask is the mask for the bits in each word needed to
  84         // represent the numeric base of each word (except the most significant
  85         // word).
  86         fieldBaseMask = (1 << fieldBase) - 1
  87
  88         // fieldMSBBits is the number of bits in the most significant word used
  89         // to represent the value.
  90         fieldMSBBits = 256 - (fieldBase * (fieldWords - 1))
  91
  92         // fieldMSBMask is the mask for the bits in the most significant word
  93         // needed to represent the value.
  94         fieldMSBMask = (1 << fieldMSBBits) - 1
  95
  96         // fieldPrimeWordZero is word zero of the secp256k1 prime in the
  97         // internal field representation.  It is used during negation.
  98         fieldPrimeWordZero = 0x3fffc2f
  99
 100         // fieldPrimeWordOne is word one of the secp256k1 prime in the
 101         // internal field representation.  It is used during negation.
 102         fieldPrimeWordOne = 0x3ffffbf
 103 )
 104
 105 // fieldVal implements optimized fixed-precision arithmetic over the
 106 // secp256k1 finite field.  This means all arithmetic is performed modulo
 107 // 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f.  It
 108 // represents each 256-bit value as 10 32-bit integers in base 2^26.  This
 109 // provides 6 bits of overflow in each word (10 bits in the most significant
 110 // word) for a total of 64 bits of overflow (9*6 + 10 = 64).  It only implements
 111 // the arithmetic needed for elliptic curve operations.
 112 //
 113 // The following depicts the internal representation:
 114 //       -----------------------------------------------------------------
 115 //      |        n[9]       |        n[8]       | ... |        n[0]       |
 116 //      | 32 bits available | 32 bits available | ... | 32 bits available |
 117 //      | 22 bits for value | 26 bits for value | ... | 26 bits for value |
 118 //      | 10 bits overflow  |  6 bits overflow  | ... |  6 bits overflow  |
 119 //      | Mult: 2^(26*9)    | Mult: 2^(26*8)    | ... | Mult: 2^(26*0)    |
 120 //       -----------------------------------------------------------------
 121 //
 122 // For example, consider the number 2^49 + 1.  It would be represented as:
 123 //      n[0] = 1
 124 //      n[1] = 2^23
 125 //      n[2..9] = 0
 126 //
 127 // The full 256-bit value is then calculated by looping i from 9..0 and
 128 // doing sum(n[i] * 2^(26i)) like so:
 129 //      n[9] * 2^(26*9) = 0    * 2^234 = 0
 130 //      n[8] * 2^(26*8) = 0    * 2^208 = 0
 131 //      ...
 132 //      n[1] * 2^(26*1) = 2^23 * 2^26  = 2^49
 133 //      n[0] * 2^(26*0) = 1    * 2^0   = 1
 134 //      Sum: 0 + 0 + ... + 2^49 + 1 = 2^49 + 1
 135 type fieldVal struct {
 136         n [10]uint32
 137 }
 138
 139 // String returns the field value as a human-readable hex string.
 140 func (f fieldVal) String() string {
 141         t := new(fieldVal).Set(&f).Normalize()
 142         return hex.EncodeToString(t.Bytes()[:])
 143 }
 144
 145 // Zero sets the field value to zero.  A newly created field value is already
 146 // set to zero.  This function can be useful to clear an existing field value
 147 // for reuse.
 148 func (f *fieldVal) Zero() {
 149         f.n[0] = 0
 150         f.n[1] = 0
 151         f.n[2] = 0
 152         f.n[3] = 0
 153         f.n[4] = 0
 154         f.n[5] = 0
 155         f.n[6] = 0
 156         f.n[7] = 0
 157         f.n[8] = 0
 158         f.n[9] = 0
 159 }
 160
 161 // Set sets the field value equal to the passed value.
 162 //
 163 // The field value is returned to support chaining.  This enables syntax like:
 164 // f := new(fieldVal).Set(f2).Add(1) so that f = f2 + 1 where f2 is not
 165 // modified.
 166 func (f *fieldVal) Set(val *fieldVal) *fieldVal {
 167         *f = *val
 168         return f
 169 }
 170
 171 // SetInt sets the field value to the passed integer.  This is a convenience
 172 // function since it is fairly common to perform some arithemetic with small
 173 // native integers.
 174 //
 175 // The field value is returned to support chaining.  This enables syntax such
 176 // as f := new(fieldVal).SetInt(2).Mul(f2) so that f = 2 * f2.
 177 func (f *fieldVal) SetInt(ui uint) *fieldVal {
 178         f.Zero()
 179         f.n[0] = uint32(ui)
 180         return f
 181 }
 182
 183 // SetBytes packs the passed 32-byte big-endian value into the internal field
 184 // value representation.
 185 //
 186 // The field value is returned to support chaining.  This enables syntax like:
 187 // f := new(fieldVal).SetBytes(byteArray).Mul(f2) so that f = ba * f2.
 188 func (f *fieldVal) SetBytes(b *[32]byte) *fieldVal {
 189         // Pack the 256 total bits across the 10 uint32 words with a max of
 190         // 26-bits per word.  This could be done with a couple of for loops,
 191         // but this unrolled version is significantly faster.  Benchmarks show
 192         // this is about 34 times faster than the variant which uses loops.
 193         f.n[0] = uint32(b[31]) | uint32(b[30])<<8 | uint32(b[29])<<16 |
 194                 (uint32(b[28])&twoBitsMask)<<24
 195         f.n[1] = uint32(b[28])>>2 | uint32(b[27])<<6 | uint32(b[26])<<14 |
 196                 (uint32(b[25])&fourBitsMask)<<22
 197         f.n[2] = uint32(b[25])>>4 | uint32(b[24])<<4 | uint32(b[23])<<12 |
 198                 (uint32(b[22])&sixBitsMask)<<20
 199         f.n[3] = uint32(b[22])>>6 | uint32(b[21])<<2 | uint32(b[20])<<10 |
 200                 uint32(b[19])<<18
 201         f.n[4] = uint32(b[18]) | uint32(b[17])<<8 | uint32(b[16])<<16 |
 202                 (uint32(b[15])&twoBitsMask)<<24
 203         f.n[5] = uint32(b[15])>>2 | uint32(b[14])<<6 | uint32(b[13])<<14 |
 204                 (uint32(b[12])&fourBitsMask)<<22
 205         f.n[6] = uint32(b[12])>>4 | uint32(b[11])<<4 | uint32(b[10])<<12 |
 206                 (uint32(b[9])&sixBitsMask)<<20
 207         f.n[7] = uint32(b[9])>>6 | uint32(b[8])<<2 | uint32(b[7])<<10 |
 208                 uint32(b[6])<<18
 209         f.n[8] = uint32(b[5]) | uint32(b[4])<<8 | uint32(b[3])<<16 |
 210                 (uint32(b[2])&twoBitsMask)<<24
 211         f.n[9] = uint32(b[2])>>2 | uint32(b[1])<<6 | uint32(b[0])<<14
 212         return f
 213 }
 214
 215 // SetByteSlice packs the passed big-endian value into the internal field value
 216 // representation.  Only the first 32-bytes are used.  As a result, it is up to
 217 // the caller to ensure numbers of the appropriate size are used or the value
 218 // will be truncated.
 219 //
 220 // The field value is returned to support chaining.  This enables syntax like:
 221 // f := new(fieldVal).SetByteSlice(byteSlice)
 222 func (f *fieldVal) SetByteSlice(b []byte) *fieldVal {
 223         var b32 [32]byte
 224         for i := 0; i < len(b); i++ {
 225                 if i < 32 {
 226                         b32[i+(32-len(b))] = b[i]
 227                 }
 228         }
 229         return f.SetBytes(&b32)
 230 }
 231
 232 // SetHex decodes the passed big-endian hex string into the internal field value
 233 // representation.  Only the first 32-bytes are used.
 234 //
 235 // The field value is returned to support chaining.  This enables syntax like:
 236 // f := new(fieldVal).SetHex("0abc").Add(1) so that f = 0x0abc + 1
 237 func (f *fieldVal) SetHex(hexString string) *fieldVal {
 238         if len(hexString)%2 != 0 {
 239                 hexString = "0" + hexString
 240         }
 241         bytes, _ := hex.DecodeString(hexString)
 242         return f.SetByteSlice(bytes)
 243 }
 244
 245 // Normalize normalizes the internal field words into the desired range and
 246 // performs fast modular reduction over the secp256k1 prime by making use of the
 247 // special form of the prime.
 248 func (f *fieldVal) Normalize() *fieldVal {
 249         // The field representation leaves 6 bits of overflow in each word so
 250         // intermediate calculations can be performed without needing to
 251         // propagate the carry to each higher word during the calculations.  In
 252         // order to normalize, we need to "compact" the full 256-bit value to
 253         // the right while propagating any carries through to the high order
 254         // word.
 255         //
 256         // Since this field is doing arithmetic modulo the secp256k1 prime, we
 257         // also need to perform modular reduction over the prime.
 258         //
 259         // Per [HAC] section 14.3.4: Reduction method of moduli of special form,
 260         // when the modulus is of the special form m = b^t - c, highly efficient
 261         // reduction can be achieved.
 262         //
 263         // The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits
 264         // this criteria.
 265         //
 266         // 4294968273 in field representation (base 2^26) is:
 267         // n[0] = 977
 268         // n[1] = 64
 269         // That is to say (2^26 * 64) + 977 = 4294968273
 270         //
 271         // The algorithm presented in the referenced section typically repeats
 272         // until the quotient is zero.  However, due to our field representation
 273         // we already know to within one reduction how many times we would need
 274         // to repeat as it's the uppermost bits of the high order word.  Thus we
 275         // can simply multiply the magnitude by the field representation of the
 276         // prime and do a single iteration.  After this step there might be an
 277         // additional carry to bit 256 (bit 22 of the high order word).
 278         t9 := f.n[9]
 279         m := t9 >> fieldMSBBits
 280         t9 = t9 & fieldMSBMask
 281         t0 := f.n[0] + m*977
 282         t1 := (t0 >> fieldBase) + f.n[1] + (m << 6)
 283         t0 = t0 & fieldBaseMask
 284         t2 := (t1 >> fieldBase) + f.n[2]
 285         t1 = t1 & fieldBaseMask
 286         t3 := (t2 >> fieldBase) + f.n[3]
 287         t2 = t2 & fieldBaseMask
 288         t4 := (t3 >> fieldBase) + f.n[4]
 289         t3 = t3 & fieldBaseMask
 290         t5 := (t4 >> fieldBase) + f.n[5]
 291         t4 = t4 & fieldBaseMask
 292         t6 := (t5 >> fieldBase) + f.n[6]
 293         t5 = t5 & fieldBaseMask
 294         t7 := (t6 >> fieldBase) + f.n[7]
 295         t6 = t6 & fieldBaseMask
 296         t8 := (t7 >> fieldBase) + f.n[8]
 297         t7 = t7 & fieldBaseMask
 298         t9 = (t8 >> fieldBase) + t9
 299         t8 = t8 & fieldBaseMask
 300
 301         // At this point, the magnitude is guaranteed to be one, however, the
 302         // value could still be greater than the prime if there was either a
 303         // carry through to bit 256 (bit 22 of the higher order word) or the
 304         // value is greater than or equal to the field characteristic.  The
 305         // following determines if either or these conditions are true and does
 306         // the final reduction in constant time.
 307         //
 308         // Note that the if/else statements here intentionally do the bitwise
 309         // operators even when it won't change the value to ensure constant time
 310         // between the branches.  Also note that 'm' will be zero when neither
 311         // of the aforementioned conditions are true and the value will not be
 312         // changed when 'm' is zero.
 313         m = 1
 314         if t9 == fieldMSBMask {
 315                 m &= 1
 316         } else {
 317                 m &= 0
 318         }
 319         if t2&t3&t4&t5&t6&t7&t8 == fieldBaseMask {
 320                 m &= 1
 321         } else {
 322                 m &= 0
 323         }
 324         if ((t0+977)>>fieldBase + t1 + 64) > fieldBaseMask {
 325                 m &= 1
 326         } else {
 327                 m &= 0
 328         }
 329         if t9>>fieldMSBBits != 0 {
 330                 m |= 1
 331         } else {
 332                 m |= 0
 333         }
 334         t0 = t0 + m*977
 335         t1 = (t0 >> fieldBase) + t1 + (m << 6)
 336         t0 = t0 & fieldBaseMask
 337         t2 = (t1 >> fieldBase) + t2
 338         t1 = t1 & fieldBaseMask
 339         t3 = (t2 >> fieldBase) + t3
 340         t2 = t2 & fieldBaseMask
 341         t4 = (t3 >> fieldBase) + t4
 342         t3 = t3 & fieldBaseMask
 343         t5 = (t4 >> fieldBase) + t5
 344         t4 = t4 & fieldBaseMask
 345         t6 = (t5 >> fieldBase) + t6
 346         t5 = t5 & fieldBaseMask
 347         t7 = (t6 >> fieldBase) + t7
 348         t6 = t6 & fieldBaseMask
 349         t8 = (t7 >> fieldBase) + t8
 350         t7 = t7 & fieldBaseMask
 351         t9 = (t8 >> fieldBase) + t9
 352         t8 = t8 & fieldBaseMask
 353         t9 = t9 & fieldMSBMask // Remove potential multiple of 2^256.
 354
 355         // Finally, set the normalized and reduced words.
 356         f.n[0] = t0
 357         f.n[1] = t1
 358         f.n[2] = t2
 359         f.n[3] = t3
 360         f.n[4] = t4
 361         f.n[5] = t5
 362         f.n[6] = t6
 363         f.n[7] = t7
 364         f.n[8] = t8
 365         f.n[9] = t9
 366         return f
 367 }
 368
 369 // PutBytes unpacks the field value to a 32-byte big-endian value using the
 370 // passed byte array.  There is a similar function, Bytes, which unpacks the
 371 // field value into a new array and returns that.  This version is provided
 372 // since it can be useful to cut down on the number of allocations by allowing
 373 // the caller to reuse a buffer.
 374 //
 375 // The field value must be normalized for this function to return the correct
 376 // result.
 377 func (f *fieldVal) PutBytes(b *[32]byte) {
 378         // Unpack the 256 total bits from the 10 uint32 words with a max of
 379         // 26-bits per word.  This could be done with a couple of for loops,
 380         // but this unrolled version is a bit faster.  Benchmarks show this is
 381         // about 10 times faster than the variant which uses loops.
 382         b[31] = byte(f.n[0] & eightBitsMask)
 383         b[30] = byte((f.n[0] >> 8) & eightBitsMask)
 384         b[29] = byte((f.n[0] >> 16) & eightBitsMask)
 385         b[28] = byte((f.n[0]>>24)&twoBitsMask | (f.n[1]&sixBitsMask)<<2)
 386         b[27] = byte((f.n[1] >> 6) & eightBitsMask)
 387         b[26] = byte((f.n[1] >> 14) & eightBitsMask)
 388         b[25] = byte((f.n[1]>>22)&fourBitsMask | (f.n[2]&fourBitsMask)<<4)
 389         b[24] = byte((f.n[2] >> 4) & eightBitsMask)
 390         b[23] = byte((f.n[2] >> 12) & eightBitsMask)
 391         b[22] = byte((f.n[2]>>20)&sixBitsMask | (f.n[3]&twoBitsMask)<<6)
 392         b[21] = byte((f.n[3] >> 2) & eightBitsMask)
 393         b[20] = byte((f.n[3] >> 10) & eightBitsMask)
 394         b[19] = byte((f.n[3] >> 18) & eightBitsMask)
 395         b[18] = byte(f.n[4] & eightBitsMask)
 396         b[17] = byte((f.n[4] >> 8) & eightBitsMask)
 397         b[16] = byte((f.n[4] >> 16) & eightBitsMask)
 398         b[15] = byte((f.n[4]>>24)&twoBitsMask | (f.n[5]&sixBitsMask)<<2)
 399         b[14] = byte((f.n[5] >> 6) & eightBitsMask)
 400         b[13] = byte((f.n[5] >> 14) & eightBitsMask)
 401         b[12] = byte((f.n[5]>>22)&fourBitsMask | (f.n[6]&fourBitsMask)<<4)
 402         b[11] = byte((f.n[6] >> 4) & eightBitsMask)
 403         b[10] = byte((f.n[6] >> 12) & eightBitsMask)
 404         b[9] = byte((f.n[6]>>20)&sixBitsMask | (f.n[7]&twoBitsMask)<<6)
 405         b[8] = byte((f.n[7] >> 2) & eightBitsMask)
 406         b[7] = byte((f.n[7] >> 10) & eightBitsMask)
 407         b[6] = byte((f.n[7] >> 18) & eightBitsMask)
 408         b[5] = byte(f.n[8] & eightBitsMask)
 409         b[4] = byte((f.n[8] >> 8) & eightBitsMask)
 410         b[3] = byte((f.n[8] >> 16) & eightBitsMask)
 411         b[2] = byte((f.n[8]>>24)&twoBitsMask | (f.n[9]&sixBitsMask)<<2)
 412         b[1] = byte((f.n[9] >> 6) & eightBitsMask)
 413         b[0] = byte((f.n[9] >> 14) & eightBitsMask)
 414 }
 415
 416 // Bytes unpacks the field value to a 32-byte big-endian value.  See PutBytes
 417 // for a variant that allows the a buffer to be passed which can be useful to
 418 // to cut down on the number of allocations by allowing the caller to reuse a
 419 // buffer.
 420 //
 421 // The field value must be normalized for this function to return correct
 422 // result.
 423 func (f *fieldVal) Bytes() *[32]byte {
 424         b := new([32]byte)
 425         f.PutBytes(b)
 426         return b
 427 }
 428
 429 // IsZero returns whether or not the field value is equal to zero.
 430 func (f *fieldVal) IsZero() bool {
 431         // The value can only be zero if no bits are set in any of the words.
 432         // This is a constant time implementation.
 433         bits := f.n[0] | f.n[1] | f.n[2] | f.n[3] | f.n[4] |
 434                 f.n[5] | f.n[6] | f.n[7] | f.n[8] | f.n[9]
 435
 436         return bits == 0
 437 }
 438
 439 // IsOdd returns whether or not the field value is an odd number.
 440 //
 441 // The field value must be normalized for this function to return correct
 442 // result.
 443 func (f *fieldVal) IsOdd() bool {
 444         // Only odd numbers have the bottom bit set.
 445         return f.n[0]&1 == 1
 446 }
 447
 448 // Equals returns whether or not the two field values are the same.  Both
 449 // field values being compared must be normalized for this function to return
 450 // the correct result.
 451 func (f *fieldVal) Equals(val *fieldVal) bool {
 452         // Xor only sets bits when they are different, so the two field values
 453         // can only be the same if no bits are set after xoring each word.
 454         // This is a constant time implementation.
 455         bits := (f.n[0] ^ val.n[0]) | (f.n[1] ^ val.n[1]) | (f.n[2] ^ val.n[2]) |
 456                 (f.n[3] ^ val.n[3]) | (f.n[4] ^ val.n[4]) | (f.n[5] ^ val.n[5]) |
 457                 (f.n[6] ^ val.n[6]) | (f.n[7] ^ val.n[7]) | (f.n[8] ^ val.n[8]) |
 458                 (f.n[9] ^ val.n[9])
 459
 460         return bits == 0
 461 }
 462
 463 // NegateVal negates the passed value and stores the result in f.  The caller
 464 // must provide the magnitude of the passed value for a correct result.
 465 //
 466 // The field value is returned to support chaining.  This enables syntax like:
 467 // f.NegateVal(f2).AddInt(1) so that f = -f2 + 1.
 468 func (f *fieldVal) NegateVal(val *fieldVal, magnitude uint32) *fieldVal {
 469         // Negation in the field is just the prime minus the value.  However,
 470         // in order to allow negation against a field value without having to
 471         // normalize/reduce it first, multiply by the magnitude (that is how
 472         // "far" away it is from the normalized value) to adjust.  Also, since
 473         // negating a value pushes it one more order of magnitude away from the
 474         // normalized range, add 1 to compensate.
 475         //
 476         // For some intuition here, imagine you're performing mod 12 arithmetic
 477         // (picture a clock) and you are negating the number 7.  So you start at
 478         // 12 (which is of course 0 under mod 12) and count backwards (left on
 479         // the clock) 7 times to arrive at 5.  Notice this is just 12-7 = 5.
 480         // Now, assume you're starting with 19, which is a number that is
 481         // already larger than the modulus and congruent to 7 (mod 12).  When a
 482         // value is already in the desired range, its magnitude is 1.  Since 19
 483         // is an additional "step", its magnitude (mod 12) is 2.  Since any
 484         // multiple of the modulus is conguent to zero (mod m), the answer can
 485         // be shortcut by simply mulplying the magnitude by the modulus and
 486         // subtracting.  Keeping with the example, this would be (2*12)-19 = 5.
 487         f.n[0] = (magnitude+1)*fieldPrimeWordZero - val.n[0]
 488         f.n[1] = (magnitude+1)*fieldPrimeWordOne - val.n[1]
 489         f.n[2] = (magnitude+1)*fieldBaseMask - val.n[2]
 490         f.n[3] = (magnitude+1)*fieldBaseMask - val.n[3]
 491         f.n[4] = (magnitude+1)*fieldBaseMask - val.n[4]
 492         f.n[5] = (magnitude+1)*fieldBaseMask - val.n[5]
 493         f.n[6] = (magnitude+1)*fieldBaseMask - val.n[6]
 494         f.n[7] = (magnitude+1)*fieldBaseMask - val.n[7]
 495         f.n[8] = (magnitude+1)*fieldBaseMask - val.n[8]
 496         f.n[9] = (magnitude+1)*fieldMSBMask - val.n[9]
 497
 498         return f
 499 }
 500
 501 // Negate negates the field value.  The existing field value is modified.  The
 502 // caller must provide the magnitude of the field value for a correct result.
 503 //
 504 // The field value is returned to support chaining.  This enables syntax like:
 505 // f.Negate().AddInt(1) so that f = -f + 1.
 506 func (f *fieldVal) Negate(magnitude uint32) *fieldVal {
 507         return f.NegateVal(f, magnitude)
 508 }
 509
 510 // AddInt adds the passed integer to the existing field value and stores the
 511 // result in f.  This is a convenience function since it is fairly common to
 512 // perform some arithemetic with small native integers.
 513 //
 514 // The field value is returned to support chaining.  This enables syntax like:
 515 // f.AddInt(1).Add(f2) so that f = f + 1 + f2.
 516 func (f *fieldVal) AddInt(ui uint) *fieldVal {
 517         // Since the field representation intentionally provides overflow bits,
 518         // it's ok to use carryless addition as the carry bit is safely part of
 519         // the word and will be normalized out.
 520         f.n[0] += uint32(ui)
 521
 522         return f
 523 }
 524
 525 // Add adds the passed value to the existing field value and stores the result
 526 // in f.
 527 //
 528 // The field value is returned to support chaining.  This enables syntax like:
 529 // f.Add(f2).AddInt(1) so that f = f + f2 + 1.
 530 func (f *fieldVal) Add(val *fieldVal) *fieldVal {
 531         // Since the field representation intentionally provides overflow bits,
 532         // it's ok to use carryless addition as the carry bit is safely part of
 533         // each word and will be normalized out.  This could obviously be done
 534         // in a loop, but the unrolled version is faster.
 535         f.n[0] += val.n[0]
 536         f.n[1] += val.n[1]
 537         f.n[2] += val.n[2]
 538         f.n[3] += val.n[3]
 539         f.n[4] += val.n[4]
 540         f.n[5] += val.n[5]
 541         f.n[6] += val.n[6]
 542         f.n[7] += val.n[7]
 543         f.n[8] += val.n[8]
 544         f.n[9] += val.n[9]
 545
 546         return f
 547 }
 548
 549 // Add2 adds the passed two field values together and stores the result in f.
 550 //
 551 // The field value is returned to support chaining.  This enables syntax like:
 552 // f3.Add2(f, f2).AddInt(1) so that f3 = f + f2 + 1.
 553 func (f *fieldVal) Add2(val *fieldVal, val2 *fieldVal) *fieldVal {
 554         // Since the field representation intentionally provides overflow bits,
 555         // it's ok to use carryless addition as the carry bit is safely part of
 556         // each word and will be normalized out.  This could obviously be done
 557         // in a loop, but the unrolled version is faster.
 558         f.n[0] = val.n[0] + val2.n[0]
 559         f.n[1] = val.n[1] + val2.n[1]
 560         f.n[2] = val.n[2] + val2.n[2]
 561         f.n[3] = val.n[3] + val2.n[3]
 562         f.n[4] = val.n[4] + val2.n[4]
 563         f.n[5] = val.n[5] + val2.n[5]
 564         f.n[6] = val.n[6] + val2.n[6]
 565         f.n[7] = val.n[7] + val2.n[7]
 566         f.n[8] = val.n[8] + val2.n[8]
 567         f.n[9] = val.n[9] + val2.n[9]
 568
 569         return f
 570 }
 571
 572 // MulInt multiplies the field value by the passed int and stores the result in
 573 // f.  Note that this function can overflow if multiplying the value by any of
 574 // the individual words exceeds a max uint32.  Therefore it is important that
 575 // the caller ensures no overflows will occur before using this function.
 576 //
 577 // The field value is returned to support chaining.  This enables syntax like:
 578 // f.MulInt(2).Add(f2) so that f = 2 * f + f2.
 579 func (f *fieldVal) MulInt(val uint) *fieldVal {
 580         // Since each word of the field representation can hold up to
 581         // fieldOverflowBits extra bits which will be normalized out, it's safe
 582         // to multiply each word without using a larger type or carry
 583         // propagation so long as the values won't overflow a uint32.  This
 584         // could obviously be done in a loop, but the unrolled version is
 585         // faster.
 586         ui := uint32(val)
 587         f.n[0] *= ui
 588         f.n[1] *= ui
 589         f.n[2] *= ui
 590         f.n[3] *= ui
 591         f.n[4] *= ui
 592         f.n[5] *= ui
 593         f.n[6] *= ui
 594         f.n[7] *= ui
 595         f.n[8] *= ui
 596         f.n[9] *= ui
 597
 598         return f
 599 }
 600
 601 // Mul multiplies the passed value to the existing field value and stores the
 602 // result in f.  Note that this function can overflow if multiplying any
 603 // of the individual words exceeds a max uint32.  In practice, this means the
 604 // magnitude of either value involved in the multiplication must be a max of
 605 // 8.
 606 //
 607 // The field value is returned to support chaining.  This enables syntax like:
 608 // f.Mul(f2).AddInt(1) so that f = (f * f2) + 1.
 609 func (f *fieldVal) Mul(val *fieldVal) *fieldVal {
 610         return f.Mul2(f, val)
 611 }
 612
 613 // Mul2 multiplies the passed two field values together and stores the result
 614 // result in f.  Note that this function can overflow if multiplying any of
 615 // the individual words exceeds a max uint32.  In practice, this means the
 616 // magnitude of either value involved in the multiplication must be a max of
 617 // 8.
 618 //
 619 // The field value is returned to support chaining.  This enables syntax like:
 620 // f3.Mul2(f, f2).AddInt(1) so that f3 = (f * f2) + 1.
 621 func (f *fieldVal) Mul2(val *fieldVal, val2 *fieldVal) *fieldVal {
 622         // This could be done with a couple of for loops and an array to store
 623         // the intermediate terms, but this unrolled version is significantly
 624         // faster.
 625
 626         // Terms for 2^(fieldBase*0).
 627         m := uint64(val.n[0]) * uint64(val2.n[0])
 628         t0 := m & fieldBaseMask
 629
 630         // Terms for 2^(fieldBase*1).
 631         m = (m >> fieldBase) +
 632                 uint64(val.n[0])*uint64(val2.n[1]) +
 633                 uint64(val.n[1])*uint64(val2.n[0])
 634         t1 := m & fieldBaseMask
 635
 636         // Terms for 2^(fieldBase*2).
 637         m = (m >> fieldBase) +
 638                 uint64(val.n[0])*uint64(val2.n[2]) +
 639                 uint64(val.n[1])*uint64(val2.n[1]) +
 640                 uint64(val.n[2])*uint64(val2.n[0])
 641         t2 := m & fieldBaseMask
 642
 643         // Terms for 2^(fieldBase*3).
 644         m = (m >> fieldBase) +
 645                 uint64(val.n[0])*uint64(val2.n[3]) +
 646                 uint64(val.n[1])*uint64(val2.n[2]) +
 647                 uint64(val.n[2])*uint64(val2.n[1]) +
 648                 uint64(val.n[3])*uint64(val2.n[0])
 649         t3 := m & fieldBaseMask
 650
 651         // Terms for 2^(fieldBase*4).
 652         m = (m >> fieldBase) +
 653                 uint64(val.n[0])*uint64(val2.n[4]) +
 654                 uint64(val.n[1])*uint64(val2.n[3]) +
 655                 uint64(val.n[2])*uint64(val2.n[2]) +
 656                 uint64(val.n[3])*uint64(val2.n[1]) +
 657                 uint64(val.n[4])*uint64(val2.n[0])
 658         t4 := m & fieldBaseMask
 659
 660         // Terms for 2^(fieldBase*5).
 661         m = (m >> fieldBase) +
 662                 uint64(val.n[0])*uint64(val2.n[5]) +
 663                 uint64(val.n[1])*uint64(val2.n[4]) +
 664                 uint64(val.n[2])*uint64(val2.n[3]) +
 665                 uint64(val.n[3])*uint64(val2.n[2]) +
 666                 uint64(val.n[4])*uint64(val2.n[1]) +
 667                 uint64(val.n[5])*uint64(val2.n[0])
 668         t5 := m & fieldBaseMask
 669
 670         // Terms for 2^(fieldBase*6).
 671         m = (m >> fieldBase) +
 672                 uint64(val.n[0])*uint64(val2.n[6]) +
 673                 uint64(val.n[1])*uint64(val2.n[5]) +
 674                 uint64(val.n[2])*uint64(val2.n[4]) +
 675                 uint64(val.n[3])*uint64(val2.n[3]) +
 676                 uint64(val.n[4])*uint64(val2.n[2]) +
 677                 uint64(val.n[5])*uint64(val2.n[1]) +
 678                 uint64(val.n[6])*uint64(val2.n[0])
 679         t6 := m & fieldBaseMask
 680
 681         // Terms for 2^(fieldBase*7).
 682         m = (m >> fieldBase) +
 683                 uint64(val.n[0])*uint64(val2.n[7]) +
 684                 uint64(val.n[1])*uint64(val2.n[6]) +
 685                 uint64(val.n[2])*uint64(val2.n[5]) +
 686                 uint64(val.n[3])*uint64(val2.n[4]) +
 687                 uint64(val.n[4])*uint64(val2.n[3]) +
 688                 uint64(val.n[5])*uint64(val2.n[2]) +
 689                 uint64(val.n[6])*uint64(val2.n[1]) +
 690                 uint64(val.n[7])*uint64(val2.n[0])
 691         t7 := m & fieldBaseMask
 692
 693         // Terms for 2^(fieldBase*8).
 694         m = (m >> fieldBase) +
 695                 uint64(val.n[0])*uint64(val2.n[8]) +
 696                 uint64(val.n[1])*uint64(val2.n[7]) +
 697                 uint64(val.n[2])*uint64(val2.n[6]) +
 698                 uint64(val.n[3])*uint64(val2.n[5]) +
 699                 uint64(val.n[4])*uint64(val2.n[4]) +
 700                 uint64(val.n[5])*uint64(val2.n[3]) +
 701                 uint64(val.n[6])*uint64(val2.n[2]) +
 702                 uint64(val.n[7])*uint64(val2.n[1]) +
 703                 uint64(val.n[8])*uint64(val2.n[0])
 704         t8 := m & fieldBaseMask
 705
 706         // Terms for 2^(fieldBase*9).
 707         m = (m >> fieldBase) +
 708                 uint64(val.n[0])*uint64(val2.n[9]) +
 709                 uint64(val.n[1])*uint64(val2.n[8]) +
 710                 uint64(val.n[2])*uint64(val2.n[7]) +
 711                 uint64(val.n[3])*uint64(val2.n[6]) +
 712                 uint64(val.n[4])*uint64(val2.n[5]) +
 713                 uint64(val.n[5])*uint64(val2.n[4]) +
 714                 uint64(val.n[6])*uint64(val2.n[3]) +
 715                 uint64(val.n[7])*uint64(val2.n[2]) +
 716                 uint64(val.n[8])*uint64(val2.n[1]) +
 717                 uint64(val.n[9])*uint64(val2.n[0])
 718         t9 := m & fieldBaseMask
 719
 720         // Terms for 2^(fieldBase*10).
 721         m = (m >> fieldBase) +
 722                 uint64(val.n[1])*uint64(val2.n[9]) +
 723                 uint64(val.n[2])*uint64(val2.n[8]) +
 724                 uint64(val.n[3])*uint64(val2.n[7]) +
 725                 uint64(val.n[4])*uint64(val2.n[6]) +
 726                 uint64(val.n[5])*uint64(val2.n[5]) +
 727                 uint64(val.n[6])*uint64(val2.n[4]) +
 728                 uint64(val.n[7])*uint64(val2.n[3]) +
 729                 uint64(val.n[8])*uint64(val2.n[2]) +
 730                 uint64(val.n[9])*uint64(val2.n[1])
 731         t10 := m & fieldBaseMask
 732
 733         // Terms for 2^(fieldBase*11).
 734         m = (m >> fieldBase) +
 735                 uint64(val.n[2])*uint64(val2.n[9]) +
 736                 uint64(val.n[3])*uint64(val2.n[8]) +
 737                 uint64(val.n[4])*uint64(val2.n[7]) +
 738                 uint64(val.n[5])*uint64(val2.n[6]) +
 739                 uint64(val.n[6])*uint64(val2.n[5]) +
 740                 uint64(val.n[7])*uint64(val2.n[4]) +
 741                 uint64(val.n[8])*uint64(val2.n[3]) +
 742                 uint64(val.n[9])*uint64(val2.n[2])
 743         t11 := m & fieldBaseMask
 744
 745         // Terms for 2^(fieldBase*12).
 746         m = (m >> fieldBase) +
 747                 uint64(val.n[3])*uint64(val2.n[9]) +
 748                 uint64(val.n[4])*uint64(val2.n[8]) +
 749                 uint64(val.n[5])*uint64(val2.n[7]) +
 750                 uint64(val.n[6])*uint64(val2.n[6]) +
 751                 uint64(val.n[7])*uint64(val2.n[5]) +
 752                 uint64(val.n[8])*uint64(val2.n[4]) +
 753                 uint64(val.n[9])*uint64(val2.n[3])
 754         t12 := m & fieldBaseMask
 755
 756         // Terms for 2^(fieldBase*13).
 757         m = (m >> fieldBase) +
 758                 uint64(val.n[4])*uint64(val2.n[9]) +
 759                 uint64(val.n[5])*uint64(val2.n[8]) +
 760                 uint64(val.n[6])*uint64(val2.n[7]) +
 761                 uint64(val.n[7])*uint64(val2.n[6]) +
 762                 uint64(val.n[8])*uint64(val2.n[5]) +
 763                 uint64(val.n[9])*uint64(val2.n[4])
 764         t13 := m & fieldBaseMask
 765
 766         // Terms for 2^(fieldBase*14).
 767         m = (m >> fieldBase) +
 768                 uint64(val.n[5])*uint64(val2.n[9]) +
 769                 uint64(val.n[6])*uint64(val2.n[8]) +
 770                 uint64(val.n[7])*uint64(val2.n[7]) +
 771                 uint64(val.n[8])*uint64(val2.n[6]) +
 772                 uint64(val.n[9])*uint64(val2.n[5])
 773         t14 := m & fieldBaseMask
 774
 775         // Terms for 2^(fieldBase*15).
 776         m = (m >> fieldBase) +
 777                 uint64(val.n[6])*uint64(val2.n[9]) +
 778                 uint64(val.n[7])*uint64(val2.n[8]) +
 779                 uint64(val.n[8])*uint64(val2.n[7]) +
 780                 uint64(val.n[9])*uint64(val2.n[6])
 781         t15 := m & fieldBaseMask
 782
 783         // Terms for 2^(fieldBase*16).
 784         m = (m >> fieldBase) +
 785                 uint64(val.n[7])*uint64(val2.n[9]) +
 786                 uint64(val.n[8])*uint64(val2.n[8]) +
 787                 uint64(val.n[9])*uint64(val2.n[7])
 788         t16 := m & fieldBaseMask
 789
 790         // Terms for 2^(fieldBase*17).
 791         m = (m >> fieldBase) +
 792                 uint64(val.n[8])*uint64(val2.n[9]) +
 793                 uint64(val.n[9])*uint64(val2.n[8])
 794         t17 := m & fieldBaseMask
 795
 796         // Terms for 2^(fieldBase*18).
 797         m = (m >> fieldBase) + uint64(val.n[9])*uint64(val2.n[9])
 798         t18 := m & fieldBaseMask
 799
 800         // What's left is for 2^(fieldBase*19).
 801         t19 := m >> fieldBase
 802
 803         // At this point, all of the terms are grouped into their respective
 804         // base.
 805         //
 806         // Per [HAC] section 14.3.4: Reduction method of moduli of special form,
 807         // when the modulus is of the special form m = b^t - c, highly efficient
 808         // reduction can be achieved per the provided algorithm.
 809         //
 810         // The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits
 811         // this criteria.
 812         //
 813         // 4294968273 in field representation (base 2^26) is:
 814         // n[0] = 977
 815         // n[1] = 64
 816         // That is to say (2^26 * 64) + 977 = 4294968273
 817         //
 818         // Since each word is in base 26, the upper terms (t10 and up) start
 819         // at 260 bits (versus the final desired range of 256 bits), so the
 820         // field representation of 'c' from above needs to be adjusted for the
 821         // extra 4 bits by multiplying it by 2^4 = 16.  4294968273 * 16 =
 822         // 68719492368.  Thus, the adjusted field representation of 'c' is:
 823         // n[0] = 977 * 16 = 15632
 824         // n[1] = 64 * 16 = 1024
 825         // That is to say (2^26 * 1024) + 15632 = 68719492368
 826         //
 827         // To reduce the final term, t19, the entire 'c' value is needed instead
 828         // of only n[0] because there are no more terms left to handle n[1].
 829         // This means there might be some magnitude left in the upper bits that
 830         // is handled below.
 831         m = t0 + t10*15632
 832         t0 = m & fieldBaseMask
 833         m = (m >> fieldBase) + t1 + t10*1024 + t11*15632
 834         t1 = m & fieldBaseMask
 835         m = (m >> fieldBase) + t2 + t11*1024 + t12*15632
 836         t2 = m & fieldBaseMask
 837         m = (m >> fieldBase) + t3 + t12*1024 + t13*15632
 838         t3 = m & fieldBaseMask
 839         m = (m >> fieldBase) + t4 + t13*1024 + t14*15632
 840         t4 = m & fieldBaseMask
 841         m = (m >> fieldBase) + t5 + t14*1024 + t15*15632
 842         t5 = m & fieldBaseMask
 843         m = (m >> fieldBase) + t6 + t15*1024 + t16*15632
 844         t6 = m & fieldBaseMask
 845         m = (m >> fieldBase) + t7 + t16*1024 + t17*15632
 846         t7 = m & fieldBaseMask
 847         m = (m >> fieldBase) + t8 + t17*1024 + t18*15632
 848         t8 = m & fieldBaseMask
 849         m = (m >> fieldBase) + t9 + t18*1024 + t19*68719492368
 850         t9 = m & fieldMSBMask
 851         m = m >> fieldMSBBits
 852
 853         // At this point, if the magnitude is greater than 0, the overall value
 854         // is greater than the max possible 256-bit value.  In particular, it is
 855         // "how many times larger" than the max value it is.
 856         //
 857         // The algorithm presented in [HAC] section 14.3.4 repeats until the
 858         // quotient is zero.  However, due to the above, we already know at
 859         // least how many times we would need to repeat as it's the value
 860         // currently in m.  Thus we can simply multiply the magnitude by the
 861         // field representation of the prime and do a single iteration.  Notice
 862         // that nothing will be changed when the magnitude is zero, so we could
 863         // skip this in that case, however always running regardless allows it
 864         // to run in constant time.  The final result will be in the range
 865         // 0 <= result <= prime + (2^64 - c), so it is guaranteed to have a
 866         // magnitude of 1, but it is denormalized.
 867         d := t0 + m*977
 868         f.n[0] = uint32(d & fieldBaseMask)
 869         d = (d >> fieldBase) + t1 + m*64
 870         f.n[1] = uint32(d & fieldBaseMask)
 871         f.n[2] = uint32((d >> fieldBase) + t2)
 872         f.n[3] = uint32(t3)
 873         f.n[4] = uint32(t4)
 874         f.n[5] = uint32(t5)
 875         f.n[6] = uint32(t6)
 876         f.n[7] = uint32(t7)
 877         f.n[8] = uint32(t8)
 878         f.n[9] = uint32(t9)
 879
 880         return f
 881 }
 882
 883 // Square squares the field value.  The existing field value is modified.  Note
 884 // that this function can overflow if multiplying any of the individual words
 885 // exceeds a max uint32.  In practice, this means the magnitude of the field
 886 // must be a max of 8 to prevent overflow.
 887 //
 888 // The field value is returned to support chaining.  This enables syntax like:
 889 // f.Square().Mul(f2) so that f = f^2 * f2.
 890 func (f *fieldVal) Square() *fieldVal {
 891         return f.SquareVal(f)
 892 }
 893
 894 // SquareVal squares the passed value and stores the result in f.  Note that
 895 // this function can overflow if multiplying any of the individual words
 896 // exceeds a max uint32.  In practice, this means the magnitude of the field
 897 // being squred must be a max of 8 to prevent overflow.
 898 //
 899 // The field value is returned to support chaining.  This enables syntax like:
 900 // f3.SquareVal(f).Mul(f) so that f3 = f^2 * f = f^3.
 901 func (f *fieldVal) SquareVal(val *fieldVal) *fieldVal {
 902         // This could be done with a couple of for loops and an array to store
 903         // the intermediate terms, but this unrolled version is significantly
 904         // faster.
 905
 906         // Terms for 2^(fieldBase*0).
 907         m := uint64(val.n[0]) * uint64(val.n[0])
 908         t0 := m & fieldBaseMask
 909
 910         // Terms for 2^(fieldBase*1).
 911         m = (m >> fieldBase) + 2*uint64(val.n[0])*uint64(val.n[1])
 912         t1 := m & fieldBaseMask
 913
 914         // Terms for 2^(fieldBase*2).
 915         m = (m >> fieldBase) +
 916                 2*uint64(val.n[0])*uint64(val.n[2]) +
 917                 uint64(val.n[1])*uint64(val.n[1])
 918         t2 := m & fieldBaseMask
 919
 920         // Terms for 2^(fieldBase*3).
 921         m = (m >> fieldBase) +
 922                 2*uint64(val.n[0])*uint64(val.n[3]) +
 923                 2*uint64(val.n[1])*uint64(val.n[2])
 924         t3 := m & fieldBaseMask
 925
 926         // Terms for 2^(fieldBase*4).
 927         m = (m >> fieldBase) +
 928                 2*uint64(val.n[0])*uint64(val.n[4]) +
 929                 2*uint64(val.n[1])*uint64(val.n[3]) +
 930                 uint64(val.n[2])*uint64(val.n[2])
 931         t4 := m & fieldBaseMask
 932
 933         // Terms for 2^(fieldBase*5).
 934         m = (m >> fieldBase) +
 935                 2*uint64(val.n[0])*uint64(val.n[5]) +
 936                 2*uint64(val.n[1])*uint64(val.n[4]) +
 937                 2*uint64(val.n[2])*uint64(val.n[3])
 938         t5 := m & fieldBaseMask
 939
 940         // Terms for 2^(fieldBase*6).
 941         m = (m >> fieldBase) +
 942                 2*uint64(val.n[0])*uint64(val.n[6]) +
 943                 2*uint64(val.n[1])*uint64(val.n[5]) +
 944                 2*uint64(val.n[2])*uint64(val.n[4]) +
 945                 uint64(val.n[3])*uint64(val.n[3])
 946         t6 := m & fieldBaseMask
 947
 948         // Terms for 2^(fieldBase*7).
 949         m = (m >> fieldBase) +
 950                 2*uint64(val.n[0])*uint64(val.n[7]) +
 951                 2*uint64(val.n[1])*uint64(val.n[6]) +
 952                 2*uint64(val.n[2])*uint64(val.n[5]) +
 953                 2*uint64(val.n[3])*uint64(val.n[4])
 954         t7 := m & fieldBaseMask
 955
 956         // Terms for 2^(fieldBase*8).
 957         m = (m >> fieldBase) +
 958                 2*uint64(val.n[0])*uint64(val.n[8]) +
 959                 2*uint64(val.n[1])*uint64(val.n[7]) +
 960                 2*uint64(val.n[2])*uint64(val.n[6]) +
 961                 2*uint64(val.n[3])*uint64(val.n[5]) +
 962                 uint64(val.n[4])*uint64(val.n[4])
 963         t8 := m & fieldBaseMask
 964
 965         // Terms for 2^(fieldBase*9).
 966         m = (m >> fieldBase) +
 967                 2*uint64(val.n[0])*uint64(val.n[9]) +
 968                 2*uint64(val.n[1])*uint64(val.n[8]) +
 969                 2*uint64(val.n[2])*uint64(val.n[7]) +
 970                 2*uint64(val.n[3])*uint64(val.n[6]) +
 971                 2*uint64(val.n[4])*uint64(val.n[5])
 972         t9 := m & fieldBaseMask
 973
 974         // Terms for 2^(fieldBase*10).
 975         m = (m >> fieldBase) +
 976                 2*uint64(val.n[1])*uint64(val.n[9]) +
 977                 2*uint64(val.n[2])*uint64(val.n[8]) +
 978                 2*uint64(val.n[3])*uint64(val.n[7]) +
 979                 2*uint64(val.n[4])*uint64(val.n[6]) +
 980                 uint64(val.n[5])*uint64(val.n[5])
 981         t10 := m & fieldBaseMask
 982
 983         // Terms for 2^(fieldBase*11).
 984         m = (m >> fieldBase) +
 985                 2*uint64(val.n[2])*uint64(val.n[9]) +
 986                 2*uint64(val.n[3])*uint64(val.n[8]) +
 987                 2*uint64(val.n[4])*uint64(val.n[7]) +
 988                 2*uint64(val.n[5])*uint64(val.n[6])
 989         t11 := m & fieldBaseMask
 990
 991         // Terms for 2^(fieldBase*12).
 992         m = (m >> fieldBase) +
 993                 2*uint64(val.n[3])*uint64(val.n[9]) +
 994                 2*uint64(val.n[4])*uint64(val.n[8]) +
 995                 2*uint64(val.n[5])*uint64(val.n[7]) +
 996                 uint64(val.n[6])*uint64(val.n[6])
 997         t12 := m & fieldBaseMask
 998
 999         // Terms for 2^(fieldBase*13).
1000         m = (m >> fieldBase) +
1001                 2*uint64(val.n[4])*uint64(val.n[9]) +
1002                 2*uint64(val.n[5])*uint64(val.n[8]) +
1003                 2*uint64(val.n[6])*uint64(val.n[7])
1004         t13 := m & fieldBaseMask
1005
1006         // Terms for 2^(fieldBase*14).
1007         m = (m >> fieldBase) +
1008                 2*uint64(val.n[5])*uint64(val.n[9]) +
1009                 2*uint64(val.n[6])*uint64(val.n[8]) +
1010                 uint64(val.n[7])*uint64(val.n[7])
1011         t14 := m & fieldBaseMask
1012
1013         // Terms for 2^(fieldBase*15).
1014         m = (m >> fieldBase) +
1015                 2*uint64(val.n[6])*uint64(val.n[9]) +
1016                 2*uint64(val.n[7])*uint64(val.n[8])
1017         t15 := m & fieldBaseMask
1018
1019         // Terms for 2^(fieldBase*16).
1020         m = (m >> fieldBase) +
1021                 2*uint64(val.n[7])*uint64(val.n[9]) +
1022                 uint64(val.n[8])*uint64(val.n[8])
1023         t16 := m & fieldBaseMask
1024
1025         // Terms for 2^(fieldBase*17).
1026         m = (m >> fieldBase) + 2*uint64(val.n[8])*uint64(val.n[9])
1027         t17 := m & fieldBaseMask
1028
1029         // Terms for 2^(fieldBase*18).
1030         m = (m >> fieldBase) + uint64(val.n[9])*uint64(val.n[9])
1031         t18 := m & fieldBaseMask
1032
1033         // What's left is for 2^(fieldBase*19).
1034         t19 := m >> fieldBase
1035
1036         // At this point, all of the terms are grouped into their respective
1037         // base.
1038         //
1039         // Per [HAC] section 14.3.4: Reduction method of moduli of special form,
1040         // when the modulus is of the special form m = b^t - c, highly efficient
1041         // reduction can be achieved per the provided algorithm.
1042         //
1043         // The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits
1044         // this criteria.
1045         //
1046         // 4294968273 in field representation (base 2^26) is:
1047         // n[0] = 977
1048         // n[1] = 64
1049         // That is to say (2^26 * 64) + 977 = 4294968273
1050         //
1051         // Since each word is in base 26, the upper terms (t10 and up) start
1052         // at 260 bits (versus the final desired range of 256 bits), so the
1053         // field representation of 'c' from above needs to be adjusted for the
1054         // extra 4 bits by multiplying it by 2^4 = 16.  4294968273 * 16 =
1055         // 68719492368.  Thus, the adjusted field representation of 'c' is:
1056         // n[0] = 977 * 16 = 15632
1057         // n[1] = 64 * 16 = 1024
1058         // That is to say (2^26 * 1024) + 15632 = 68719492368
1059         //
1060         // To reduce the final term, t19, the entire 'c' value is needed instead
1061         // of only n[0] because there are no more terms left to handle n[1].
1062         // This means there might be some magnitude left in the upper bits that
1063         // is handled below.
1064         m = t0 + t10*15632
1065         t0 = m & fieldBaseMask
1066         m = (m >> fieldBase) + t1 + t10*1024 + t11*15632
1067         t1 = m & fieldBaseMask
1068         m = (m >> fieldBase) + t2 + t11*1024 + t12*15632
1069         t2 = m & fieldBaseMask
1070         m = (m >> fieldBase) + t3 + t12*1024 + t13*15632
1071         t3 = m & fieldBaseMask
1072         m = (m >> fieldBase) + t4 + t13*1024 + t14*15632
1073         t4 = m & fieldBaseMask
1074         m = (m >> fieldBase) + t5 + t14*1024 + t15*15632
1075         t5 = m & fieldBaseMask
1076         m = (m >> fieldBase) + t6 + t15*1024 + t16*15632
1077         t6 = m & fieldBaseMask
1078         m = (m >> fieldBase) + t7 + t16*1024 + t17*15632
1079         t7 = m & fieldBaseMask
1080         m = (m >> fieldBase) + t8 + t17*1024 + t18*15632
1081         t8 = m & fieldBaseMask
1082         m = (m >> fieldBase) + t9 + t18*1024 + t19*68719492368
1083         t9 = m & fieldMSBMask
1084         m = m >> fieldMSBBits
1085
1086         // At this point, if the magnitude is greater than 0, the overall value
1087         // is greater than the max possible 256-bit value.  In particular, it is
1088         // "how many times larger" than the max value it is.
1089         //
1090         // The algorithm presented in [HAC] section 14.3.4 repeats until the
1091         // quotient is zero.  However, due to the above, we already know at
1092         // least how many times we would need to repeat as it's the value
1093         // currently in m.  Thus we can simply multiply the magnitude by the
1094         // field representation of the prime and do a single iteration.  Notice
1095         // that nothing will be changed when the magnitude is zero, so we could
1096         // skip this in that case, however always running regardless allows it
1097         // to run in constant time.  The final result will be in the range
1098         // 0 <= result <= prime + (2^64 - c), so it is guaranteed to have a
1099         // magnitude of 1, but it is denormalized.
1100         n := t0 + m*977
1101         f.n[0] = uint32(n & fieldBaseMask)
1102         n = (n >> fieldBase) + t1 + m*64
1103         f.n[1] = uint32(n & fieldBaseMask)
1104         f.n[2] = uint32((n >> fieldBase) + t2)
1105         f.n[3] = uint32(t3)
1106         f.n[4] = uint32(t4)
1107         f.n[5] = uint32(t5)
1108         f.n[6] = uint32(t6)
1109         f.n[7] = uint32(t7)
1110         f.n[8] = uint32(t8)
1111         f.n[9] = uint32(t9)
1112
1113         return f
1114 }
1115
1116 // Inverse finds the modular multiplicative inverse of the field value.  The
1117 // existing field value is modified.
1118 //
1119 // The field value is returned to support chaining.  This enables syntax like:
1120 // f.Inverse().Mul(f2) so that f = f^-1 * f2.
1121 func (f *fieldVal) Inverse() *fieldVal {
1122         // Fermat's little theorem states that for a nonzero number a and prime
1123         // prime p, a^(p-1) = 1 (mod p).  Since the multipliciative inverse is
1124         // a*b = 1 (mod p), it follows that b = a*a^(p-2) = a^(p-1) = 1 (mod p).
1125         // Thus, a^(p-2) is the multiplicative inverse.
1126         //
1127         // In order to efficiently compute a^(p-2), p-2 needs to be split into
1128         // a sequence of squares and multipications that minimizes the number of
1129         // multiplications needed (since they are more costly than squarings).
1130         // Intermediate results are saved and reused as well.
1131         //
1132         // The secp256k1 prime - 2 is 2^256 - 4294968275.
1133         //
1134         // This has a cost of 258 field squarings and 33 field multiplications.
1135         var a2, a3, a4, a10, a11, a21, a42, a45, a63, a1019, a1023 fieldVal
1136         a2.SquareVal(f)
1137         a3.Mul2(&a2, f)
1138         a4.SquareVal(&a2)
1139         a10.SquareVal(&a4).Mul(&a2)
1140         a11.Mul2(&a10, f)
1141         a21.Mul2(&a10, &a11)
1142         a42.SquareVal(&a21)
1143         a45.Mul2(&a42, &a3)
1144         a63.Mul2(&a42, &a21)
1145         a1019.SquareVal(&a63).Square().Square().Square().Mul(&a11)
1146         a1023.Mul2(&a1019, &a4)
1147         f.Set(&a63)                                    // f = a^(2^6 - 1)
1148         f.Square().Square().Square().Square().Square() // f = a^(2^11 - 32)
1149         f.Square().Square().Square().Square().Square() // f = a^(2^16 - 1024)
1150         f.Mul(&a1023)                                  // f = a^(2^16 - 1)
1151         f.Square().Square().Square().Square().Square() // f = a^(2^21 - 32)
1152         f.Square().Square().Square().Square().Square() // f = a^(2^26 - 1024)
1153         f.Mul(&a1023)                                  // f = a^(2^26 - 1)
1154         f.Square().Square().Square().Square().Square() // f = a^(2^31 - 32)
1155         f.Square().Square().Square().Square().Square() // f = a^(2^36 - 1024)
1156         f.Mul(&a1023)                                  // f = a^(2^36 - 1)
1157         f.Square().Square().Square().Square().Square() // f = a^(2^41 - 32)
1158         f.Square().Square().Square().Square().Square() // f = a^(2^46 - 1024)
1159         f.Mul(&a1023)                                  // f = a^(2^46 - 1)
1160         f.Square().Square().Square().Square().Square() // f = a^(2^51 - 32)
1161         f.Square().Square().Square().Square().Square() // f = a^(2^56 - 1024)
1162         f.Mul(&a1023)                                  // f = a^(2^56 - 1)
1163         f.Square().Square().Square().Square().Square() // f = a^(2^61 - 32)
1164         f.Square().Square().Square().Square().Square() // f = a^(2^66 - 1024)
1165         f.Mul(&a1023)                                  // f = a^(2^66 - 1)
1166         f.Square().Square().Square().Square().Square() // f = a^(2^71 - 32)
1167         f.Square().Square().Square().Square().Square() // f = a^(2^76 - 1024)
1168         f.Mul(&a1023)                                  // f = a^(2^76 - 1)
1169         f.Square().Square().Square().Square().Square() // f = a^(2^81 - 32)
1170         f.Square().Square().Square().Square().Square() // f = a^(2^86 - 1024)
1171         f.Mul(&a1023)                                  // f = a^(2^86 - 1)
1172         f.Square().Square().Square().Square().Square() // f = a^(2^91 - 32)
1173         f.Square().Square().Square().Square().Square() // f = a^(2^96 - 1024)
1174         f.Mul(&a1023)                                  // f = a^(2^96 - 1)
1175         f.Square().Square().Square().Square().Square() // f = a^(2^101 - 32)
1176         f.Square().Square().Square().Square().Square() // f = a^(2^106 - 1024)
1177         f.Mul(&a1023)                                  // f = a^(2^106 - 1)
1178         f.Square().Square().Square().Square().Square() // f = a^(2^111 - 32)
1179         f.Square().Square().Square().Square().Square() // f = a^(2^116 - 1024)
1180         f.Mul(&a1023)                                  // f = a^(2^116 - 1)
1181         f.Square().Square().Square().Square().Square() // f = a^(2^121 - 32)
1182         f.Square().Square().Square().Square().Square() // f = a^(2^126 - 1024)
1183         f.Mul(&a1023)                                  // f = a^(2^126 - 1)
1184         f.Square().Square().Square().Square().Square() // f = a^(2^131 - 32)
1185         f.Square().Square().Square().Square().Square() // f = a^(2^136 - 1024)
1186         f.Mul(&a1023)                                  // f = a^(2^136 - 1)
1187         f.Square().Square().Square().Square().Square() // f = a^(2^141 - 32)
1188         f.Square().Square().Square().Square().Square() // f = a^(2^146 - 1024)
1189         f.Mul(&a1023)                                  // f = a^(2^146 - 1)
1190         f.Square().Square().Square().Square().Square() // f = a^(2^151 - 32)
1191         f.Square().Square().Square().Square().Square() // f = a^(2^156 - 1024)
1192         f.Mul(&a1023)                                  // f = a^(2^156 - 1)
1193         f.Square().Square().Square().Square().Square() // f = a^(2^161 - 32)
1194         f.Square().Square().Square().Square().Square() // f = a^(2^166 - 1024)
1195         f.Mul(&a1023)                                  // f = a^(2^166 - 1)
1196         f.Square().Square().Square().Square().Square() // f = a^(2^171 - 32)
1197         f.Square().Square().Square().Square().Square() // f = a^(2^176 - 1024)
1198         f.Mul(&a1023)                                  // f = a^(2^176 - 1)
1199         f.Square().Square().Square().Square().Square() // f = a^(2^181 - 32)
1200         f.Square().Square().Square().Square().Square() // f = a^(2^186 - 1024)
1201         f.Mul(&a1023)                                  // f = a^(2^186 - 1)
1202         f.Square().Square().Square().Square().Square() // f = a^(2^191 - 32)
1203         f.Square().Square().Square().Square().Square() // f = a^(2^196 - 1024)
1204         f.Mul(&a1023)                                  // f = a^(2^196 - 1)
1205         f.Square().Square().Square().Square().Square() // f = a^(2^201 - 32)
1206         f.Square().Square().Square().Square().Square() // f = a^(2^206 - 1024)
1207         f.Mul(&a1023)                                  // f = a^(2^206 - 1)
1208         f.Square().Square().Square().Square().Square() // f = a^(2^211 - 32)
1209         f.Square().Square().Square().Square().Square() // f = a^(2^216 - 1024)
1210         f.Mul(&a1023)                                  // f = a^(2^216 - 1)
1211         f.Square().Square().Square().Square().Square() // f = a^(2^221 - 32)
1212         f.Square().Square().Square().Square().Square() // f = a^(2^226 - 1024)
1213         f.Mul(&a1019)                                  // f = a^(2^226 - 5)
1214         f.Square().Square().Square().Square().Square() // f = a^(2^231 - 160)
1215         f.Square().Square().Square().Square().Square() // f = a^(2^236 - 5120)
1216         f.Mul(&a1023)                                  // f = a^(2^236 - 4097)
1217         f.Square().Square().Square().Square().Square() // f = a^(2^241 - 131104)
1218         f.Square().Square().Square().Square().Square() // f = a^(2^246 - 4195328)
1219         f.Mul(&a1023)                                  // f = a^(2^246 - 4194305)
1220         f.Square().Square().Square().Square().Square() // f = a^(2^251 - 134217760)
1221         f.Square().Square().Square().Square().Square() // f = a^(2^256 - 4294968320)
1222         return f.Mul(&a45)                             // f = a^(2^256 - 4294968275) = a^(p-2)
1223 }