crypto/sm2/p256.go

   1 package sm2
   2
   3 import (
   4         "crypto/elliptic"
   5         "math/big"
   6         "sync"
   7 )
   8
   9 /** 学习标准库p256的优化方法实现sm2的快速版本
  10  * 标准库的p256的代码实现有些晦涩难懂，当然sm2的同样如此，有兴趣的大家可以研究研究，最后神兽压阵。。。
  11  *
  12  * ━━━━━━animal━━━━━━
  13  * 　　　┏┓　　　┏┓
  14  * 　　┏┛┻━━━┛┻┓
  15  * 　　┃　　　　　　　┃
  16  * 　　┃　　　━　　　┃
  17  * 　　┃　┳┛　┗┳　┃
  18  * 　　┃　　　　　　　┃
  19  * 　　┃　　　┻　　　┃
  20  * 　　┃　　　　　　　┃
  21  * 　　┗━┓　　　┏━┛
  22  * 　　　┃　　　┃
  23  *　　 　┃　　　┃
  24  *　　　 ┃　　　┗━━━┓
  25  *         　┃　　　　　┣┓
  26  *   　　┃　　　　　┏┛
  27  *　　 　┗┓┓┏━┳┓┏┛
  28  *　　　　┃┫┫ ┃┫┫
  29  *　　　　┗┻┛ ┗┻┛
  30  *
  31  * ━━━━━Kawaii ━━━━━━
  32  */
  33
  34 type sm2P256Curve struct {
  35         RInverse *big.Int
  36         *elliptic.CurveParams
  37         a, b, gx, gy sm2P256FieldElement
  38 }
  39
  40 var initonce sync.Once
  41 var sm2P256 sm2P256Curve
  42
  43 type sm2P256FieldElement [9]uint32
  44 type sm2P256LargeFieldElement [17]uint64
  45
  46 const (
  47         bottom28Bits = 0xFFFFFFF
  48         bottom29Bits = 0x1FFFFFFF
  49 )
  50
  51 func initP256Sm2() {
  52         sm2P256.CurveParams = &elliptic.CurveParams{Name: "SM2-P-256"} // sm2
  53         A, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFC", 16)
  54         //SM2椭        椭 圆 曲 线 公 钥 密 码 算 法 推 荐 曲 线 参 数
  55         sm2P256.P, _ = new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16)
  56         sm2P256.N, _ = new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFF7203DF6B21C6052B53BBF40939D54123", 16)
  57         sm2P256.B, _ = new(big.Int).SetString("28E9FA9E9D9F5E344D5A9E4BCF6509A7F39789F515AB8F92DDBCBD414D940E93", 16)
  58         sm2P256.Gx, _ = new(big.Int).SetString("32C4AE2C1F1981195F9904466A39C9948FE30BBFF2660BE1715A4589334C74C7", 16)
  59         sm2P256.Gy, _ = new(big.Int).SetString("BC3736A2F4F6779C59BDCEE36B692153D0A9877CC62A474002DF32E52139F0A0", 16)
  60         sm2P256.RInverse, _ = new(big.Int).SetString("7ffffffd80000002fffffffe000000017ffffffe800000037ffffffc80000002", 16)
  61         sm2P256.BitSize = 256
  62         sm2P256FromBig(&sm2P256.a, A)
  63         sm2P256FromBig(&sm2P256.gx, sm2P256.Gx)
  64         sm2P256FromBig(&sm2P256.gy, sm2P256.Gy)
  65         sm2P256FromBig(&sm2P256.b, sm2P256.B)
  66 }
  67
  68 func P256Sm2() elliptic.Curve {
  69         initonce.Do(initP256Sm2)
  70         return sm2P256
  71 }
  72
  73 func (curve sm2P256Curve) Params() *elliptic.CurveParams {
  74         return sm2P256.CurveParams
  75 }
  76
  77 // y^2 = x^3 + ax + b
  78 func (curve sm2P256Curve) IsOnCurve(X, Y *big.Int) bool {
  79         var a, x, y, y2, x3 sm2P256FieldElement
  80
  81         sm2P256FromBig(&x, X)
  82         sm2P256FromBig(&y, Y)
  83
  84         sm2P256Square(&x3, &x)       // x3 = x ^ 2
  85         sm2P256Mul(&x3, &x3, &x)     // x3 = x ^ 2 * x
  86         sm2P256Mul(&a, &curve.a, &x) // a = a * x
  87         sm2P256Add(&x3, &x3, &a)
  88         sm2P256Add(&x3, &x3, &curve.b)
  89
  90         sm2P256Square(&y2, &y) // y2 = y ^ 2
  91         return sm2P256ToBig(&x3).Cmp(sm2P256ToBig(&y2)) == 0
  92 }
  93
  94 func zForAffine(x, y *big.Int) *big.Int {
  95         z := new(big.Int)
  96         if x.Sign() != 0 || y.Sign() != 0 {
  97                 z.SetInt64(1)
  98         }
  99         return z
 100 }
 101
 102 func (curve sm2P256Curve) Add(x1, y1, x2, y2 *big.Int) (*big.Int, *big.Int) {
 103         var X1, Y1, Z1, X2, Y2, Z2, X3, Y3, Z3 sm2P256FieldElement
 104
 105         z1 := zForAffine(x1, y1)
 106         z2 := zForAffine(x2, y2)
 107         sm2P256FromBig(&X1, x1)
 108         sm2P256FromBig(&Y1, y1)
 109         sm2P256FromBig(&Z1, z1)
 110         sm2P256FromBig(&X2, x2)
 111         sm2P256FromBig(&Y2, y2)
 112         sm2P256FromBig(&Z2, z2)
 113         sm2P256PointAdd(&X1, &Y1, &Z1, &X2, &Y2, &Z2, &X3, &Y3, &Z3)
 114         return sm2P256ToAffine(&X3, &Y3, &Z3)
 115 }
 116
 117 func (curve sm2P256Curve) Double(x1, y1 *big.Int) (*big.Int, *big.Int) {
 118         var X1, Y1, Z1 sm2P256FieldElement
 119
 120         z1 := zForAffine(x1, y1)
 121         sm2P256FromBig(&X1, x1)
 122         sm2P256FromBig(&Y1, y1)
 123         sm2P256FromBig(&Z1, z1)
 124         sm2P256PointDouble(&X1, &Y1, &Z1, &X1, &Y1, &Z1)
 125         return sm2P256ToAffine(&X1, &Y1, &Z1)
 126 }
 127
 128 func (curve sm2P256Curve) ScalarMult(x1, y1 *big.Int, k []byte) (*big.Int, *big.Int) {
 129         var scalarReversed [32]byte
 130         var X, Y, Z, X1, Y1 sm2P256FieldElement
 131
 132         sm2P256FromBig(&X1, x1)
 133         sm2P256FromBig(&Y1, y1)
 134         sm2P256GetScalar(&scalarReversed, k)
 135         sm2P256ScalarMult(&X, &Y, &Z, &X1, &Y1, &scalarReversed)
 136         return sm2P256ToAffine(&X, &Y, &Z)
 137 }
 138
 139 func (curve sm2P256Curve) ScalarBaseMult(k []byte) (*big.Int, *big.Int) {
 140         var scalarReversed [32]byte
 141         var X, Y, Z sm2P256FieldElement
 142
 143         sm2P256GetScalar(&scalarReversed, k)
 144         sm2P256ScalarBaseMult(&X, &Y, &Z, &scalarReversed)
 145         return sm2P256ToAffine(&X, &Y, &Z)
 146 }
 147
 148 var sm2P256Precomputed = [9 * 2 * 15 * 2]uint32{
 149         0x830053d, 0x328990f, 0x6c04fe1, 0xc0f72e5, 0x1e19f3c, 0x666b093, 0x175a87b, 0xec38276, 0x222cf4b,
 150         0x185a1bba, 0x354e593, 0x1295fac1, 0xf2bc469, 0x47c60fa, 0xc19b8a9, 0xf63533e, 0x903ae6b, 0xc79acba,
 151         0x15b061a4, 0x33e020b, 0xdffb34b, 0xfcf2c8, 0x16582e08, 0x262f203, 0xfb34381, 0xa55452, 0x604f0ff,
 152         0x41f1f90, 0xd64ced2, 0xee377bf, 0x75f05f0, 0x189467ae, 0xe2244e, 0x1e7700e8, 0x3fbc464, 0x9612d2e,
 153         0x1341b3b8, 0xee84e23, 0x1edfa5b4, 0x14e6030, 0x19e87be9, 0x92f533c, 0x1665d96c, 0x226653e, 0xa238d3e,
 154         0xf5c62c, 0x95bb7a, 0x1f0e5a41, 0x28789c3, 0x1f251d23, 0x8726609, 0xe918910, 0x8096848, 0xf63d028,
 155         0x152296a1, 0x9f561a8, 0x14d376fb, 0x898788a, 0x61a95fb, 0xa59466d, 0x159a003d, 0x1ad1698, 0x93cca08,
 156         0x1b314662, 0x706e006, 0x11ce1e30, 0x97b710, 0x172fbc0d, 0x8f50158, 0x11c7ffe7, 0xd182cce, 0xc6ad9e8,
 157         0x12ea31b2, 0xc4e4f38, 0x175b0d96, 0xec06337, 0x75a9c12, 0xb001fdf, 0x93e82f5, 0x34607de, 0xb8035ed,
 158         0x17f97924, 0x75cf9e6, 0xdceaedd, 0x2529924, 0x1a10c5ff, 0xb1a54dc, 0x19464d8, 0x2d1997, 0xde6a110,
 159         0x1e276ee5, 0x95c510c, 0x1aca7c7a, 0xfe48aca, 0x121ad4d9, 0xe4132c6, 0x8239b9d, 0x40ea9cd, 0x816c7b,
 160         0x632d7a4, 0xa679813, 0x5911fcf, 0x82b0f7c, 0x57b0ad5, 0xbef65, 0xd541365, 0x7f9921f, 0xc62e7a,
 161         0x3f4b32d, 0x58e50e1, 0x6427aed, 0xdcdda67, 0xe8c2d3e, 0x6aa54a4, 0x18df4c35, 0x49a6a8e, 0x3cd3d0c,
 162         0xd7adf2, 0xcbca97, 0x1bda5f2d, 0x3258579, 0x606b1e6, 0x6fc1b5b, 0x1ac27317, 0x503ca16, 0xa677435,
 163         0x57bc73, 0x3992a42, 0xbab987b, 0xfab25eb, 0x128912a4, 0x90a1dc4, 0x1402d591, 0x9ffbcfc, 0xaa48856,
 164         0x7a7c2dc, 0xcefd08a, 0x1b29bda6, 0xa785641, 0x16462d8c, 0x76241b7, 0x79b6c3b, 0x204ae18, 0xf41212b,
 165         0x1f567a4d, 0xd6ce6db, 0xedf1784, 0x111df34, 0x85d7955, 0x55fc189, 0x1b7ae265, 0xf9281ac, 0xded7740,
 166         0xf19468b, 0x83763bb, 0x8ff7234, 0x3da7df8, 0x9590ac3, 0xdc96f2a, 0x16e44896, 0x7931009, 0x99d5acc,
 167         0x10f7b842, 0xaef5e84, 0xc0310d7, 0xdebac2c, 0x2a7b137, 0x4342344, 0x19633649, 0x3a10624, 0x4b4cb56,
 168         0x1d809c59, 0xac007f, 0x1f0f4bcd, 0xa1ab06e, 0xc5042cf, 0x82c0c77, 0x76c7563, 0x22c30f3, 0x3bf1568,
 169         0x7a895be, 0xfcca554, 0x12e90e4c, 0x7b4ab5f, 0x13aeb76b, 0x5887e2c, 0x1d7fe1e3, 0x908c8e3, 0x95800ee,
 170         0xb36bd54, 0xf08905d, 0x4e73ae8, 0xf5a7e48, 0xa67cb0, 0x50e1067, 0x1b944a0a, 0xf29c83a, 0xb23cfb9,
 171         0xbe1db1, 0x54de6e8, 0xd4707f2, 0x8ebcc2d, 0x2c77056, 0x1568ce4, 0x15fcc849, 0x4069712, 0xe2ed85f,
 172         0x2c5ff09, 0x42a6929, 0x628e7ea, 0xbd5b355, 0xaf0bd79, 0xaa03699, 0xdb99816, 0x4379cef, 0x81d57b,
 173         0x11237f01, 0xe2a820b, 0xfd53b95, 0x6beb5ee, 0x1aeb790c, 0xe470d53, 0x2c2cfee, 0x1c1d8d8, 0xa520fc4,
 174         0x1518e034, 0xa584dd4, 0x29e572b, 0xd4594fc, 0x141a8f6f, 0x8dfccf3, 0x5d20ba3, 0x2eb60c3, 0x9f16eb0,
 175         0x11cec356, 0xf039f84, 0x1b0990c1, 0xc91e526, 0x10b65bae, 0xf0616e8, 0x173fa3ff, 0xec8ccf9, 0xbe32790,
 176         0x11da3e79, 0xe2f35c7, 0x908875c, 0xdacf7bd, 0x538c165, 0x8d1487f, 0x7c31aed, 0x21af228, 0x7e1689d,
 177         0xdfc23ca, 0x24f15dc, 0x25ef3c4, 0x35248cd, 0x99a0f43, 0xa4b6ecc, 0xd066b3, 0x2481152, 0x37a7688,
 178         0x15a444b6, 0xb62300c, 0x4b841b, 0xa655e79, 0xd53226d, 0xbeb348a, 0x127f3c2, 0xb989247, 0x71a277d,
 179         0x19e9dfcb, 0xb8f92d0, 0xe2d226c, 0x390a8b0, 0x183cc462, 0x7bd8167, 0x1f32a552, 0x5e02db4, 0xa146ee9,
 180         0x1a003957, 0x1c95f61, 0x1eeec155, 0x26f811f, 0xf9596ba, 0x3082bfb, 0x96df083, 0x3e3a289, 0x7e2d8be,
 181         0x157a63e0, 0x99b8941, 0x1da7d345, 0xcc6cd0, 0x10beed9a, 0x48e83c0, 0x13aa2e25, 0x7cad710, 0x4029988,
 182         0x13dfa9dd, 0xb94f884, 0x1f4adfef, 0xb88543, 0x16f5f8dc, 0xa6a67f4, 0x14e274e2, 0x5e56cf4, 0x2f24ef,
 183         0x1e9ef967, 0xfe09bad, 0xfe079b3, 0xcc0ae9e, 0xb3edf6d, 0x3e961bc, 0x130d7831, 0x31043d6, 0xba986f9,
 184         0x1d28055, 0x65240ca, 0x4971fa3, 0x81b17f8, 0x11ec34a5, 0x8366ddc, 0x1471809, 0xfa5f1c6, 0xc911e15,
 185         0x8849491, 0xcf4c2e2, 0x14471b91, 0x39f75be, 0x445c21e, 0xf1585e9, 0x72cc11f, 0x4c79f0c, 0xe5522e1,
 186         0x1874c1ee, 0x4444211, 0x7914884, 0x3d1b133, 0x25ba3c, 0x4194f65, 0x1c0457ef, 0xac4899d, 0xe1fa66c,
 187         0x130a7918, 0x9b8d312, 0x4b1c5c8, 0x61ccac3, 0x18c8aa6f, 0xe93cb0a, 0xdccb12c, 0xde10825, 0x969737d,
 188         0xf58c0c3, 0x7cee6a9, 0xc2c329a, 0xc7f9ed9, 0x107b3981, 0x696a40e, 0x152847ff, 0x4d88754, 0xb141f47,
 189         0x5a16ffe, 0x3a7870a, 0x18667659, 0x3b72b03, 0xb1c9435, 0x9285394, 0xa00005a, 0x37506c, 0x2edc0bb,
 190         0x19afe392, 0xeb39cac, 0x177ef286, 0xdf87197, 0x19f844ed, 0x31fe8, 0x15f9bfd, 0x80dbec, 0x342e96e,
 191         0x497aced, 0xe88e909, 0x1f5fa9ba, 0x530a6ee, 0x1ef4e3f1, 0x69ffd12, 0x583006d, 0x2ecc9b1, 0x362db70,
 192         0x18c7bdc5, 0xf4bb3c5, 0x1c90b957, 0xf067c09, 0x9768f2b, 0xf73566a, 0x1939a900, 0x198c38a, 0x202a2a1,
 193         0x4bbf5a6, 0x4e265bc, 0x1f44b6e7, 0x185ca49, 0xa39e81b, 0x24aff5b, 0x4acc9c2, 0x638bdd3, 0xb65b2a8,
 194         0x6def8be, 0xb94537a, 0x10b81dee, 0xe00ec55, 0x2f2cdf7, 0xc20622d, 0x2d20f36, 0xe03c8c9, 0x898ea76,
 195         0x8e3921b, 0x8905bff, 0x1e94b6c8, 0xee7ad86, 0x154797f2, 0xa620863, 0x3fbd0d9, 0x1f3caab, 0x30c24bd,
 196         0x19d3892f, 0x59c17a2, 0x1ab4b0ae, 0xf8714ee, 0x90c4098, 0xa9c800d, 0x1910236b, 0xea808d3, 0x9ae2f31,
 197         0x1a15ad64, 0xa48c8d1, 0x184635a4, 0xb725ef1, 0x11921dcc, 0x3f866df, 0x16c27568, 0xbdf580a, 0xb08f55c,
 198         0x186ee1c, 0xb1627fa, 0x34e82f6, 0x933837e, 0xf311be5, 0xfedb03b, 0x167f72cd, 0xa5469c0, 0x9c82531,
 199         0xb92a24b, 0x14fdc8b, 0x141980d1, 0xbdc3a49, 0x7e02bb1, 0xaf4e6dd, 0x106d99e1, 0xd4616fc, 0x93c2717,
 200         0x1c0a0507, 0xc6d5fed, 0x9a03d8b, 0xa1d22b0, 0x127853e3, 0xc4ac6b8, 0x1a048cf7, 0x9afb72c, 0x65d485d,
 201         0x72d5998, 0xe9fa744, 0xe49e82c, 0x253cf80, 0x5f777ce, 0xa3799a5, 0x17270cbb, 0xc1d1ef0, 0xdf74977,
 202         0x114cb859, 0xfa8e037, 0xb8f3fe5, 0xc734cc6, 0x70d3d61, 0xeadac62, 0x12093dd0, 0x9add67d, 0x87200d6,
 203         0x175bcbb, 0xb29b49f, 0x1806b79c, 0x12fb61f, 0x170b3a10, 0x3aaf1cf, 0xa224085, 0x79d26af, 0x97759e2,
 204         0x92e19f1, 0xb32714d, 0x1f00d9f1, 0xc728619, 0x9e6f627, 0xe745e24, 0x18ea4ace, 0xfc60a41, 0x125f5b2,
 205         0xc3cf512, 0x39ed486, 0xf4d15fa, 0xf9167fd, 0x1c1f5dd5, 0xc21a53e, 0x1897930, 0x957a112, 0x21059a0,
 206         0x1f9e3ddc, 0xa4dfced, 0x8427f6f, 0x726fbe7, 0x1ea658f8, 0x2fdcd4c, 0x17e9b66f, 0xb2e7c2e, 0x39923bf,
 207         0x1bae104, 0x3973ce5, 0xc6f264c, 0x3511b84, 0x124195d7, 0x11996bd, 0x20be23d, 0xdc437c4, 0x4b4f16b,
 208         0x11902a0, 0x6c29cc9, 0x1d5ffbe6, 0xdb0b4c7, 0x10144c14, 0x2f2b719, 0x301189, 0x2343336, 0xa0bf2ac,
 209 }
 210
 211 func sm2P256GetScalar(b *[32]byte, a []byte) {
 212         var scalarBytes []byte
 213
 214         n := new(big.Int).SetBytes(a)
 215         if n.Cmp(sm2P256.N) >= 0 {
 216                 n.Mod(n, sm2P256.N)
 217                 scalarBytes = n.Bytes()
 218         } else {
 219                 scalarBytes = a
 220         }
 221         for i, v := range scalarBytes {
 222                 b[len(scalarBytes)-(1+i)] = v
 223         }
 224 }
 225
 226 func sm2P256PointAddMixed(xOut, yOut, zOut, x1, y1, z1, x2, y2 *sm2P256FieldElement) {
 227         var z1z1, z1z1z1, s2, u2, h, i, j, r, rr, v, tmp sm2P256FieldElement
 228
 229         sm2P256Square(&z1z1, z1)
 230         sm2P256Add(&tmp, z1, z1)
 231
 232         sm2P256Mul(&u2, x2, &z1z1)
 233         sm2P256Mul(&z1z1z1, z1, &z1z1)
 234         sm2P256Mul(&s2, y2, &z1z1z1)
 235         sm2P256Sub(&h, &u2, x1)
 236         sm2P256Add(&i, &h, &h)
 237         sm2P256Square(&i, &i)
 238         sm2P256Mul(&j, &h, &i)
 239         sm2P256Sub(&r, &s2, y1)
 240         sm2P256Add(&r, &r, &r)
 241         sm2P256Mul(&v, x1, &i)
 242
 243         sm2P256Mul(zOut, &tmp, &h)
 244         sm2P256Square(&rr, &r)
 245         sm2P256Sub(xOut, &rr, &j)
 246         sm2P256Sub(xOut, xOut, &v)
 247         sm2P256Sub(xOut, xOut, &v)
 248
 249         sm2P256Sub(&tmp, &v, xOut)
 250         sm2P256Mul(yOut, &tmp, &r)
 251         sm2P256Mul(&tmp, y1, &j)
 252         sm2P256Sub(yOut, yOut, &tmp)
 253         sm2P256Sub(yOut, yOut, &tmp)
 254 }
 255
 256 // sm2P256CopyConditional sets out=in if mask = 0xffffffff in constant time.
 257 //
 258 // On entry: mask is either 0 or 0xffffffff.
 259 func sm2P256CopyConditional(out, in *sm2P256FieldElement, mask uint32) {
 260         for i := 0; i < 9; i++ {
 261                 tmp := mask & (in[i] ^ out[i])
 262                 out[i] ^= tmp
 263         }
 264 }
 265
 266 // sm2P256SelectAffinePoint sets {out_x,out_y} to the index'th entry of table.
 267 // On entry: index < 16, table[0] must be zero.
 268 func sm2P256SelectAffinePoint(xOut, yOut *sm2P256FieldElement, table []uint32, index uint32) {
 269         for i := range xOut {
 270                 xOut[i] = 0
 271         }
 272         for i := range yOut {
 273                 yOut[i] = 0
 274         }
 275
 276         for i := uint32(1); i < 16; i++ {
 277                 mask := i ^ index
 278                 mask |= mask >> 2
 279                 mask |= mask >> 1
 280                 mask &= 1
 281                 mask--
 282                 for j := range xOut {
 283                         xOut[j] |= table[0] & mask
 284                         table = table[1:]
 285                 }
 286                 for j := range yOut {
 287                         yOut[j] |= table[0] & mask
 288                         table = table[1:]
 289                 }
 290         }
 291 }
 292
 293 // sm2P256SelectJacobianPoint sets {out_x,out_y,out_z} to the index'th entry of
 294 // table.
 295 // On entry: index < 16, table[0] must be zero.
 296 func sm2P256SelectJacobianPoint(xOut, yOut, zOut *sm2P256FieldElement, table *[16][3]sm2P256FieldElement, index uint32) {
 297         for i := range xOut {
 298                 xOut[i] = 0
 299         }
 300         for i := range yOut {
 301                 yOut[i] = 0
 302         }
 303         for i := range zOut {
 304                 zOut[i] = 0
 305         }
 306
 307         // The implicit value at index 0 is all zero. We don't need to perform that
 308         // iteration of the loop because we already set out_* to zero.
 309         for i := uint32(1); i < 16; i++ {
 310                 mask := i ^ index
 311                 mask |= mask >> 2
 312                 mask |= mask >> 1
 313                 mask &= 1
 314                 mask--
 315                 for j := range xOut {
 316                         xOut[j] |= table[i][0][j] & mask
 317                 }
 318                 for j := range yOut {
 319                         yOut[j] |= table[i][1][j] & mask
 320                 }
 321                 for j := range zOut {
 322                         zOut[j] |= table[i][2][j] & mask
 323                 }
 324         }
 325 }
 326
 327 // sm2P256GetBit returns the bit'th bit of scalar.
 328 func sm2P256GetBit(scalar *[32]uint8, bit uint) uint32 {
 329         return uint32(((scalar[bit>>3]) >> (bit & 7)) & 1)
 330 }
 331
 332 // sm2P256ScalarBaseMult sets {xOut,yOut,zOut} = scalar*G where scalar is a
 333 // little-endian number. Note that the value of scalar must be less than the
 334 // order of the group.
 335 func sm2P256ScalarBaseMult(xOut, yOut, zOut *sm2P256FieldElement, scalar *[32]uint8) {
 336         nIsInfinityMask := ^uint32(0)
 337         var px, py, tx, ty, tz sm2P256FieldElement
 338         var pIsNoninfiniteMask, mask, tableOffset uint32
 339
 340         for i := range xOut {
 341                 xOut[i] = 0
 342         }
 343         for i := range yOut {
 344                 yOut[i] = 0
 345         }
 346         for i := range zOut {
 347                 zOut[i] = 0
 348         }
 349
 350         // The loop adds bits at positions 0, 64, 128 and 192, followed by
 351         // positions 32,96,160 and 224 and does this 32 times.
 352         for i := uint(0); i < 32; i++ {
 353                 if i != 0 {
 354                         sm2P256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut)
 355                 }
 356                 tableOffset = 0
 357                 for j := uint(0); j <= 32; j += 32 {
 358                         bit0 := sm2P256GetBit(scalar, 31-i+j)
 359                         bit1 := sm2P256GetBit(scalar, 95-i+j)
 360                         bit2 := sm2P256GetBit(scalar, 159-i+j)
 361                         bit3 := sm2P256GetBit(scalar, 223-i+j)
 362                         index := bit0 | (bit1 << 1) | (bit2 << 2) | (bit3 << 3)
 363
 364                         sm2P256SelectAffinePoint(&px, &py, sm2P256Precomputed[tableOffset:], index)
 365                         tableOffset += 30 * 9
 366
 367                         // Since scalar is less than the order of the group, we know that
 368                         // {xOut,yOut,zOut} != {px,py,1}, unless both are zero, which we handle
 369                         // below.
 370                         sm2P256PointAddMixed(&tx, &ty, &tz, xOut, yOut, zOut, &px, &py)
 371                         // The result of pointAddMixed is incorrect if {xOut,yOut,zOut} is zero
 372                         // (a.k.a.  the point at infinity). We handle that situation by
 373                         // copying the point from the table.
 374                         sm2P256CopyConditional(xOut, &px, nIsInfinityMask)
 375                         sm2P256CopyConditional(yOut, &py, nIsInfinityMask)
 376                         sm2P256CopyConditional(zOut, &sm2P256Factor[1], nIsInfinityMask)
 377
 378                         // Equally, the result is also wrong if the point from the table is
 379                         // zero, which happens when the index is zero. We handle that by
 380                         // only copying from {tx,ty,tz} to {xOut,yOut,zOut} if index != 0.
 381                         pIsNoninfiniteMask = nonZeroToAllOnes(index)
 382                         mask = pIsNoninfiniteMask & ^nIsInfinityMask
 383                         sm2P256CopyConditional(xOut, &tx, mask)
 384                         sm2P256CopyConditional(yOut, &ty, mask)
 385                         sm2P256CopyConditional(zOut, &tz, mask)
 386                         // If p was not zero, then n is now non-zero.
 387                         nIsInfinityMask &^= pIsNoninfiniteMask
 388                 }
 389         }
 390 }
 391
 392 func sm2P256ScalarMult(xOut, yOut, zOut, x, y *sm2P256FieldElement, scalar *[32]uint8) {
 393         var precomp [16][3]sm2P256FieldElement
 394         var px, py, pz, tx, ty, tz sm2P256FieldElement
 395         var nIsInfinityMask, index, pIsNoninfiniteMask, mask uint32
 396
 397         // We precompute 0,1,2,... times {x,y}.
 398         precomp[1][0] = *x
 399         precomp[1][1] = *y
 400         precomp[1][2] = sm2P256Factor[1]
 401
 402         for i := 2; i < 16; i += 2 {
 403                 sm2P256PointDouble(&precomp[i][0], &precomp[i][1], &precomp[i][2], &precomp[i/2][0], &precomp[i/2][1], &precomp[i/2][2])
 404                 sm2P256PointAddMixed(&precomp[i+1][0], &precomp[i+1][1], &precomp[i+1][2], &precomp[i][0], &precomp[i][1], &precomp[i][2], x, y)
 405         }
 406
 407         for i := range xOut {
 408                 xOut[i] = 0
 409         }
 410         for i := range yOut {
 411                 yOut[i] = 0
 412         }
 413         for i := range zOut {
 414                 zOut[i] = 0
 415         }
 416         nIsInfinityMask = ^uint32(0)
 417
 418         // We add in a window of four bits each iteration and do this 64 times.
 419         for i := 0; i < 64; i++ {
 420                 if i != 0 {
 421                         sm2P256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut)
 422                         sm2P256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut)
 423                         sm2P256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut)
 424                         sm2P256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut)
 425                 }
 426
 427                 index = uint32(scalar[31-i/2])
 428                 if (i & 1) == 1 {
 429                         index &= 15
 430                 } else {
 431                         index >>= 4
 432                 }
 433
 434                 // See the comments in scalarBaseMult about handling infinities.
 435                 sm2P256SelectJacobianPoint(&px, &py, &pz, &precomp, index)
 436                 sm2P256PointAdd(xOut, yOut, zOut, &px, &py, &pz, &tx, &ty, &tz)
 437                 sm2P256CopyConditional(xOut, &px, nIsInfinityMask)
 438                 sm2P256CopyConditional(yOut, &py, nIsInfinityMask)
 439                 sm2P256CopyConditional(zOut, &pz, nIsInfinityMask)
 440
 441                 pIsNoninfiniteMask = nonZeroToAllOnes(index)
 442                 mask = pIsNoninfiniteMask & ^nIsInfinityMask
 443                 sm2P256CopyConditional(xOut, &tx, mask)
 444                 sm2P256CopyConditional(yOut, &ty, mask)
 445                 sm2P256CopyConditional(zOut, &tz, mask)
 446                 nIsInfinityMask &^= pIsNoninfiniteMask
 447         }
 448 }
 449
 450 func sm2P256PointToAffine(xOut, yOut, x, y, z *sm2P256FieldElement) {
 451         var zInv, zInvSq sm2P256FieldElement
 452
 453         zz := sm2P256ToBig(z)
 454         zz.ModInverse(zz, sm2P256.P)
 455         sm2P256FromBig(&zInv, zz)
 456
 457         sm2P256Square(&zInvSq, &zInv)
 458         sm2P256Mul(xOut, x, &zInvSq)
 459         sm2P256Mul(&zInv, &zInv, &zInvSq)
 460         sm2P256Mul(yOut, y, &zInv)
 461 }
 462
 463 func sm2P256ToAffine(x, y, z *sm2P256FieldElement) (xOut, yOut *big.Int) {
 464         var xx, yy sm2P256FieldElement
 465
 466         sm2P256PointToAffine(&xx, &yy, x, y, z)
 467         return sm2P256ToBig(&xx), sm2P256ToBig(&yy)
 468 }
 469
 470 var sm2P256Factor = []sm2P256FieldElement{
 471         sm2P256FieldElement{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
 472         sm2P256FieldElement{0x2, 0x0, 0x1FFFFF00, 0x7FF, 0x0, 0x0, 0x0, 0x2000000, 0x0},
 473         sm2P256FieldElement{0x4, 0x0, 0x1FFFFE00, 0xFFF, 0x0, 0x0, 0x0, 0x4000000, 0x0},
 474         sm2P256FieldElement{0x6, 0x0, 0x1FFFFD00, 0x17FF, 0x0, 0x0, 0x0, 0x6000000, 0x0},
 475         sm2P256FieldElement{0x8, 0x0, 0x1FFFFC00, 0x1FFF, 0x0, 0x0, 0x0, 0x8000000, 0x0},
 476         sm2P256FieldElement{0xA, 0x0, 0x1FFFFB00, 0x27FF, 0x0, 0x0, 0x0, 0xA000000, 0x0},
 477         sm2P256FieldElement{0xC, 0x0, 0x1FFFFA00, 0x2FFF, 0x0, 0x0, 0x0, 0xC000000, 0x0},
 478         sm2P256FieldElement{0xE, 0x0, 0x1FFFF900, 0x37FF, 0x0, 0x0, 0x0, 0xE000000, 0x0},
 479         sm2P256FieldElement{0x10, 0x0, 0x1FFFF800, 0x3FFF, 0x0, 0x0, 0x0, 0x0, 0x01},
 480 }
 481
 482 func sm2P256Scalar(b *sm2P256FieldElement, a int) {
 483         sm2P256Mul(b, b, &sm2P256Factor[a])
 484 }
 485
 486 // (x3, y3, z3) = (x1, y1, z1) + (x2, y2, z2)
 487 func sm2P256PointAdd(x1, y1, z1, x2, y2, z2, x3, y3, z3 *sm2P256FieldElement) {
 488         var u1, u2, z22, z12, z23, z13, s1, s2, h, h2, r, r2, tm sm2P256FieldElement
 489
 490         if sm2P256ToBig(z1).Sign() == 0 {
 491                 sm2P256Dup(x3, x2)
 492                 sm2P256Dup(y3, y2)
 493                 sm2P256Dup(z3, z2)
 494                 return
 495         }
 496
 497         if sm2P256ToBig(z2).Sign() == 0 {
 498                 sm2P256Dup(x3, x1)
 499                 sm2P256Dup(y3, y1)
 500                 sm2P256Dup(z3, z1)
 501                 return
 502         }
 503
 504         sm2P256Square(&z12, z1) // z12 = z1 ^ 2
 505         sm2P256Square(&z22, z2) // z22 = z2 ^ 2
 506
 507         sm2P256Mul(&z13, &z12, z1) // z13 = z1 ^ 3
 508         sm2P256Mul(&z23, &z22, z2) // z23 = z2 ^ 3
 509
 510         sm2P256Mul(&u1, x1, &z22) // u1 = x1 * z2 ^ 2
 511         sm2P256Mul(&u2, x2, &z12) // u2 = x2 * z1 ^ 2
 512
 513         sm2P256Mul(&s1, y1, &z23) // s1 = y1 * z2 ^ 3
 514         sm2P256Mul(&s2, y2, &z13) // s2 = y2 * z1 ^ 3
 515
 516         if sm2P256ToBig(&u1).Cmp(sm2P256ToBig(&u2)) == 0 &&
 517                 sm2P256ToBig(&s1).Cmp(sm2P256ToBig(&s2)) == 0 {
 518                 sm2P256PointDouble(x1, y1, z1, x1, y1, z1)
 519         }
 520
 521         sm2P256Sub(&h, &u2, &u1) // h = u2 - u1
 522         sm2P256Sub(&r, &s2, &s1) // r = s2 - s1
 523
 524         sm2P256Square(&r2, &r) // r2 = r ^ 2
 525         sm2P256Square(&h2, &h) // h2 = h ^ 2
 526
 527         sm2P256Mul(&tm, &h2, &h) // tm = h ^ 3
 528         sm2P256Sub(x3, &r2, &tm)
 529         sm2P256Mul(&tm, &u1, &h2)
 530         sm2P256Scalar(&tm, 2)   // tm = 2 * (u1 * h ^ 2)
 531         sm2P256Sub(x3, x3, &tm) // x3 = r ^ 2 - h ^ 3 - 2 * u1 * h ^ 2
 532
 533         sm2P256Mul(&tm, &u1, &h2) // tm = u1 * h ^ 2
 534         sm2P256Sub(&tm, &tm, x3)  // tm = u1 * h ^ 2 - x3
 535         sm2P256Mul(y3, &r, &tm)
 536         sm2P256Mul(&tm, &h2, &h)  // tm = h ^ 3
 537         sm2P256Mul(&tm, &tm, &s1) // tm = s1 * h ^ 3
 538         sm2P256Sub(y3, y3, &tm)   // y3 = r * (u1 * h ^ 2 - x3) - s1 * h ^ 3
 539
 540         sm2P256Mul(z3, z1, z2)
 541         sm2P256Mul(z3, z3, &h) // z3 = z1 * z3 * h
 542 }
 543
 544 func sm2P256PointDouble(x3, y3, z3, x, y, z *sm2P256FieldElement) {
 545         var s, m, m2, x2, y2, z2, z4, y4, az4 sm2P256FieldElement
 546
 547         sm2P256Square(&x2, x) // x2 = x ^ 2
 548         sm2P256Square(&y2, y) // y2 = y ^ 2
 549         sm2P256Square(&z2, z) // z2 = z ^ 2
 550
 551         sm2P256Square(&z4, z)   // z4 = z ^ 2
 552         sm2P256Mul(&z4, &z4, z) // z4 = z ^ 3
 553         sm2P256Mul(&z4, &z4, z) // z4 = z ^ 4
 554
 555         sm2P256Square(&y4, y)   // y4 = y ^ 2
 556         sm2P256Mul(&y4, &y4, y) // y4 = y ^ 3
 557         sm2P256Mul(&y4, &y4, y) // y4 = y ^ 4
 558         sm2P256Scalar(&y4, 8)   // y4 = 8 * y ^ 4
 559
 560         sm2P256Mul(&s, x, &y2)
 561         sm2P256Scalar(&s, 4) // s = 4 * x * y ^ 2
 562
 563         sm2P256Dup(&m, &x2)
 564         sm2P256Scalar(&m, 3)
 565         sm2P256Mul(&az4, &sm2P256.a, &z4)
 566         sm2P256Add(&m, &m, &az4) // m = 3 * x ^ 2 + a * z ^ 4
 567
 568         sm2P256Square(&m2, &m) // m2 = m ^ 2
 569
 570         sm2P256Add(z3, y, z)
 571         sm2P256Square(z3, z3)
 572         sm2P256Sub(z3, z3, &z2)
 573         sm2P256Sub(z3, z3, &y2) // z' = (y + z) ^2 - z ^ 2 - y ^ 2
 574
 575         sm2P256Sub(x3, &m2, &s)
 576         sm2P256Sub(x3, x3, &s) // x' = m2 - 2 * s
 577
 578         sm2P256Sub(y3, &s, x3)
 579         sm2P256Mul(y3, y3, &m)
 580         sm2P256Sub(y3, y3, &y4) // y' = m * (s - x') - 8 * y ^ 4
 581 }
 582
 583 // p256Zero31 is 0 mod p.
 584 var sm2P256Zero31 = sm2P256FieldElement{0x7FFFFFF8, 0x3FFFFFFC, 0x800003FC, 0x3FFFDFFC, 0x7FFFFFFC, 0x3FFFFFFC, 0x7FFFFFFC, 0x37FFFFFC, 0x7FFFFFFC}
 585
 586 // c = a + b
 587 func sm2P256Add(c, a, b *sm2P256FieldElement) {
 588         carry := uint32(0)
 589         for i := 0; ; i++ {
 590                 c[i] = a[i] + b[i]
 591                 c[i] += carry
 592                 carry = c[i] >> 29
 593                 c[i] &= bottom29Bits
 594                 i++
 595                 if i == 9 {
 596                         break
 597                 }
 598                 c[i] = a[i] + b[i]
 599                 c[i] += carry
 600                 carry = c[i] >> 28
 601                 c[i] &= bottom28Bits
 602         }
 603         sm2P256ReduceCarry(c, carry)
 604 }
 605
 606 // c = a - b
 607 func sm2P256Sub(c, a, b *sm2P256FieldElement) {
 608         var carry uint32
 609
 610         for i := 0; ; i++ {
 611                 c[i] = a[i] - b[i]
 612                 c[i] += sm2P256Zero31[i]
 613                 c[i] += carry
 614                 carry = c[i] >> 29
 615                 c[i] &= bottom29Bits
 616                 i++
 617                 if i == 9 {
 618                         break
 619                 }
 620                 c[i] = a[i] - b[i]
 621                 c[i] += sm2P256Zero31[i]
 622                 c[i] += carry
 623                 carry = c[i] >> 28
 624                 c[i] &= bottom28Bits
 625         }
 626         sm2P256ReduceCarry(c, carry)
 627 }
 628
 629 // c = a * b
 630 func sm2P256Mul(c, a, b *sm2P256FieldElement) {
 631         var tmp sm2P256LargeFieldElement
 632
 633         tmp[0] = uint64(a[0]) * uint64(b[0])
 634         tmp[1] = uint64(a[0])*(uint64(b[1])<<0) +
 635                 uint64(a[1])*(uint64(b[0])<<0)
 636         tmp[2] = uint64(a[0])*(uint64(b[2])<<0) +
 637                 uint64(a[1])*(uint64(b[1])<<1) +
 638                 uint64(a[2])*(uint64(b[0])<<0)
 639         tmp[3] = uint64(a[0])*(uint64(b[3])<<0) +
 640                 uint64(a[1])*(uint64(b[2])<<0) +
 641                 uint64(a[2])*(uint64(b[1])<<0) +
 642                 uint64(a[3])*(uint64(b[0])<<0)
 643         tmp[4] = uint64(a[0])*(uint64(b[4])<<0) +
 644                 uint64(a[1])*(uint64(b[3])<<1) +
 645                 uint64(a[2])*(uint64(b[2])<<0) +
 646                 uint64(a[3])*(uint64(b[1])<<1) +
 647                 uint64(a[4])*(uint64(b[0])<<0)
 648         tmp[5] = uint64(a[0])*(uint64(b[5])<<0) +
 649                 uint64(a[1])*(uint64(b[4])<<0) +
 650                 uint64(a[2])*(uint64(b[3])<<0) +
 651                 uint64(a[3])*(uint64(b[2])<<0) +
 652                 uint64(a[4])*(uint64(b[1])<<0) +
 653                 uint64(a[5])*(uint64(b[0])<<0)
 654         tmp[6] = uint64(a[0])*(uint64(b[6])<<0) +
 655                 uint64(a[1])*(uint64(b[5])<<1) +
 656                 uint64(a[2])*(uint64(b[4])<<0) +
 657                 uint64(a[3])*(uint64(b[3])<<1) +
 658                 uint64(a[4])*(uint64(b[2])<<0) +
 659                 uint64(a[5])*(uint64(b[1])<<1) +
 660                 uint64(a[6])*(uint64(b[0])<<0)
 661         tmp[7] = uint64(a[0])*(uint64(b[7])<<0) +
 662                 uint64(a[1])*(uint64(b[6])<<0) +
 663                 uint64(a[2])*(uint64(b[5])<<0) +
 664                 uint64(a[3])*(uint64(b[4])<<0) +
 665                 uint64(a[4])*(uint64(b[3])<<0) +
 666                 uint64(a[5])*(uint64(b[2])<<0) +
 667                 uint64(a[6])*(uint64(b[1])<<0) +
 668                 uint64(a[7])*(uint64(b[0])<<0)
 669         // tmp[8] has the greatest value but doesn't overflow. See logic in
 670         // p256Square.
 671         tmp[8] = uint64(a[0])*(uint64(b[8])<<0) +
 672                 uint64(a[1])*(uint64(b[7])<<1) +
 673                 uint64(a[2])*(uint64(b[6])<<0) +
 674                 uint64(a[3])*(uint64(b[5])<<1) +
 675                 uint64(a[4])*(uint64(b[4])<<0) +
 676                 uint64(a[5])*(uint64(b[3])<<1) +
 677                 uint64(a[6])*(uint64(b[2])<<0) +
 678                 uint64(a[7])*(uint64(b[1])<<1) +
 679                 uint64(a[8])*(uint64(b[0])<<0)
 680         tmp[9] = uint64(a[1])*(uint64(b[8])<<0) +
 681                 uint64(a[2])*(uint64(b[7])<<0) +
 682                 uint64(a[3])*(uint64(b[6])<<0) +
 683                 uint64(a[4])*(uint64(b[5])<<0) +
 684                 uint64(a[5])*(uint64(b[4])<<0) +
 685                 uint64(a[6])*(uint64(b[3])<<0) +
 686                 uint64(a[7])*(uint64(b[2])<<0) +
 687                 uint64(a[8])*(uint64(b[1])<<0)
 688         tmp[10] = uint64(a[2])*(uint64(b[8])<<0) +
 689                 uint64(a[3])*(uint64(b[7])<<1) +
 690                 uint64(a[4])*(uint64(b[6])<<0) +
 691                 uint64(a[5])*(uint64(b[5])<<1) +
 692                 uint64(a[6])*(uint64(b[4])<<0) +
 693                 uint64(a[7])*(uint64(b[3])<<1) +
 694                 uint64(a[8])*(uint64(b[2])<<0)
 695         tmp[11] = uint64(a[3])*(uint64(b[8])<<0) +
 696                 uint64(a[4])*(uint64(b[7])<<0) +
 697                 uint64(a[5])*(uint64(b[6])<<0) +
 698                 uint64(a[6])*(uint64(b[5])<<0) +
 699                 uint64(a[7])*(uint64(b[4])<<0) +
 700                 uint64(a[8])*(uint64(b[3])<<0)
 701         tmp[12] = uint64(a[4])*(uint64(b[8])<<0) +
 702                 uint64(a[5])*(uint64(b[7])<<1) +
 703                 uint64(a[6])*(uint64(b[6])<<0) +
 704                 uint64(a[7])*(uint64(b[5])<<1) +
 705                 uint64(a[8])*(uint64(b[4])<<0)
 706         tmp[13] = uint64(a[5])*(uint64(b[8])<<0) +
 707                 uint64(a[6])*(uint64(b[7])<<0) +
 708                 uint64(a[7])*(uint64(b[6])<<0) +
 709                 uint64(a[8])*(uint64(b[5])<<0)
 710         tmp[14] = uint64(a[6])*(uint64(b[8])<<0) +
 711                 uint64(a[7])*(uint64(b[7])<<1) +
 712                 uint64(a[8])*(uint64(b[6])<<0)
 713         tmp[15] = uint64(a[7])*(uint64(b[8])<<0) +
 714                 uint64(a[8])*(uint64(b[7])<<0)
 715         tmp[16] = uint64(a[8]) * (uint64(b[8]) << 0)
 716         sm2P256ReduceDegree(c, &tmp)
 717 }
 718
 719 // b = a * a
 720 func sm2P256Square(b, a *sm2P256FieldElement) {
 721         var tmp sm2P256LargeFieldElement
 722
 723         tmp[0] = uint64(a[0]) * uint64(a[0])
 724         tmp[1] = uint64(a[0]) * (uint64(a[1]) << 1)
 725         tmp[2] = uint64(a[0])*(uint64(a[2])<<1) +
 726                 uint64(a[1])*(uint64(a[1])<<1)
 727         tmp[3] = uint64(a[0])*(uint64(a[3])<<1) +
 728                 uint64(a[1])*(uint64(a[2])<<1)
 729         tmp[4] = uint64(a[0])*(uint64(a[4])<<1) +
 730                 uint64(a[1])*(uint64(a[3])<<2) +
 731                 uint64(a[2])*uint64(a[2])
 732         tmp[5] = uint64(a[0])*(uint64(a[5])<<1) +
 733                 uint64(a[1])*(uint64(a[4])<<1) +
 734                 uint64(a[2])*(uint64(a[3])<<1)
 735         tmp[6] = uint64(a[0])*(uint64(a[6])<<1) +
 736                 uint64(a[1])*(uint64(a[5])<<2) +
 737                 uint64(a[2])*(uint64(a[4])<<1) +
 738                 uint64(a[3])*(uint64(a[3])<<1)
 739         tmp[7] = uint64(a[0])*(uint64(a[7])<<1) +
 740                 uint64(a[1])*(uint64(a[6])<<1) +
 741                 uint64(a[2])*(uint64(a[5])<<1) +
 742                 uint64(a[3])*(uint64(a[4])<<1)
 743         // tmp[8] has the greatest value of 2**61 + 2**60 + 2**61 + 2**60 + 2**60,
 744         // which is < 2**64 as required.
 745         tmp[8] = uint64(a[0])*(uint64(a[8])<<1) +
 746                 uint64(a[1])*(uint64(a[7])<<2) +
 747                 uint64(a[2])*(uint64(a[6])<<1) +
 748                 uint64(a[3])*(uint64(a[5])<<2) +
 749                 uint64(a[4])*uint64(a[4])
 750         tmp[9] = uint64(a[1])*(uint64(a[8])<<1) +
 751                 uint64(a[2])*(uint64(a[7])<<1) +
 752                 uint64(a[3])*(uint64(a[6])<<1) +
 753                 uint64(a[4])*(uint64(a[5])<<1)
 754         tmp[10] = uint64(a[2])*(uint64(a[8])<<1) +
 755                 uint64(a[3])*(uint64(a[7])<<2) +
 756                 uint64(a[4])*(uint64(a[6])<<1) +
 757                 uint64(a[5])*(uint64(a[5])<<1)
 758         tmp[11] = uint64(a[3])*(uint64(a[8])<<1) +
 759                 uint64(a[4])*(uint64(a[7])<<1) +
 760                 uint64(a[5])*(uint64(a[6])<<1)
 761         tmp[12] = uint64(a[4])*(uint64(a[8])<<1) +
 762                 uint64(a[5])*(uint64(a[7])<<2) +
 763                 uint64(a[6])*uint64(a[6])
 764         tmp[13] = uint64(a[5])*(uint64(a[8])<<1) +
 765                 uint64(a[6])*(uint64(a[7])<<1)
 766         tmp[14] = uint64(a[6])*(uint64(a[8])<<1) +
 767                 uint64(a[7])*(uint64(a[7])<<1)
 768         tmp[15] = uint64(a[7]) * (uint64(a[8]) << 1)
 769         tmp[16] = uint64(a[8]) * uint64(a[8])
 770         sm2P256ReduceDegree(b, &tmp)
 771 }
 772
 773 // nonZeroToAllOnes returns:
 774 //   0xffffffff for 0 < x <= 2**31
 775 //   0 for x == 0 or x > 2**31.
 776 func nonZeroToAllOnes(x uint32) uint32 {
 777         return ((x - 1) >> 31) - 1
 778 }
 779
 780 var sm2P256Carry = [8 * 9]uint32{
 781         0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
 782         0x2, 0x0, 0x1FFFFF00, 0x7FF, 0x0, 0x0, 0x0, 0x2000000, 0x0,
 783         0x4, 0x0, 0x1FFFFE00, 0xFFF, 0x0, 0x0, 0x0, 0x4000000, 0x0,
 784         0x6, 0x0, 0x1FFFFD00, 0x17FF, 0x0, 0x0, 0x0, 0x6000000, 0x0,
 785         0x8, 0x0, 0x1FFFFC00, 0x1FFF, 0x0, 0x0, 0x0, 0x8000000, 0x0,
 786         0xA, 0x0, 0x1FFFFB00, 0x27FF, 0x0, 0x0, 0x0, 0xA000000, 0x0,
 787         0xC, 0x0, 0x1FFFFA00, 0x2FFF, 0x0, 0x0, 0x0, 0xC000000, 0x0,
 788         0xE, 0x0, 0x1FFFF900, 0x37FF, 0x0, 0x0, 0x0, 0xE000000, 0x0,
 789 }
 790
 791 // carry < 2 ^ 3
 792 func sm2P256ReduceCarry(a *sm2P256FieldElement, carry uint32) {
 793         a[0] += sm2P256Carry[carry*9+0]
 794         a[2] += sm2P256Carry[carry*9+2]
 795         a[3] += sm2P256Carry[carry*9+3]
 796         a[7] += sm2P256Carry[carry*9+7]
 797 }
 798
 799 // 这代码真是丑比了，我也是对自己醉了。。。
 800 // 你最好别改这个代码，不然你会死的很惨。。
 801 func sm2P256ReduceDegree(a *sm2P256FieldElement, b *sm2P256LargeFieldElement) {
 802         var tmp [18]uint32
 803         var carry, x, xMask uint32
 804
 805         // tmp
 806         // 0  | 1  | 2  | 3  | 4  | 5  | 6  | 7  | 8  |  9 | 10 ...
 807         // 29 | 28 | 29 | 28 | 29 | 28 | 29 | 28 | 29 | 28 | 29 ...
 808         tmp[0] = uint32(b[0]) & bottom29Bits
 809         tmp[1] = uint32(b[0]) >> 29
 810         tmp[1] |= (uint32(b[0]>>32) << 3) & bottom28Bits
 811         tmp[1] += uint32(b[1]) & bottom28Bits
 812         carry = tmp[1] >> 28
 813         tmp[1] &= bottom28Bits
 814         for i := 2; i < 17; i++ {
 815                 tmp[i] = (uint32(b[i-2] >> 32)) >> 25
 816                 tmp[i] += (uint32(b[i-1])) >> 28
 817                 tmp[i] += (uint32(b[i-1]>>32) << 4) & bottom29Bits
 818                 tmp[i] += uint32(b[i]) & bottom29Bits
 819                 tmp[i] += carry
 820                 carry = tmp[i] >> 29
 821                 tmp[i] &= bottom29Bits
 822
 823                 i++
 824                 if i == 17 {
 825                         break
 826                 }
 827                 tmp[i] = uint32(b[i-2]>>32) >> 25
 828                 tmp[i] += uint32(b[i-1]) >> 29
 829                 tmp[i] += ((uint32(b[i-1] >> 32)) << 3) & bottom28Bits
 830                 tmp[i] += uint32(b[i]) & bottom28Bits
 831                 tmp[i] += carry
 832                 carry = tmp[i] >> 28
 833                 tmp[i] &= bottom28Bits
 834         }
 835         tmp[17] = uint32(b[15]>>32) >> 25
 836         tmp[17] += uint32(b[16]) >> 29
 837         tmp[17] += uint32(b[16]>>32) << 3
 838         tmp[17] += carry
 839
 840         for i := 0; ; i += 2 {
 841
 842                 tmp[i+1] += tmp[i] >> 29
 843                 x = tmp[i] & bottom29Bits
 844                 tmp[i] = 0
 845                 if x > 0 {
 846                         set4 := uint32(0)
 847                         set7 := uint32(0)
 848                         xMask = nonZeroToAllOnes(x)
 849                         tmp[i+2] += (x << 7) & bottom29Bits
 850                         tmp[i+3] += x >> 22
 851                         if tmp[i+3] < 0x10000000 {
 852                                 set4 = 1
 853                                 tmp[i+3] += 0x10000000 & xMask
 854                                 tmp[i+3] -= (x << 10) & bottom28Bits
 855                         } else {
 856                                 tmp[i+3] -= (x << 10) & bottom28Bits
 857                         }
 858                         if tmp[i+4] < 0x20000000 {
 859                                 tmp[i+4] += 0x20000000 & xMask
 860                                 tmp[i+4] -= set4 // 借位
 861                                 tmp[i+4] -= x >> 18
 862                                 if tmp[i+5] < 0x10000000 {
 863                                         tmp[i+5] += 0x10000000 & xMask
 864                                         tmp[i+5] -= 1 // 借位
 865                                         if tmp[i+6] < 0x20000000 {
 866                                                 set7 = 1
 867                                                 tmp[i+6] += 0x20000000 & xMask
 868                                                 tmp[i+6] -= 1 // 借位
 869                                         } else {
 870                                                 tmp[i+6] -= 1 // 借位
 871                                         }
 872                                 } else {
 873                                         tmp[i+5] -= 1
 874                                 }
 875                         } else {
 876                                 tmp[i+4] -= set4 // 借位
 877                                 tmp[i+4] -= x >> 18
 878                         }
 879                         if tmp[i+7] < 0x10000000 {
 880                                 tmp[i+7] += 0x10000000 & xMask
 881                                 tmp[i+7] -= set7
 882                                 tmp[i+7] -= (x << 24) & bottom28Bits
 883                                 tmp[i+8] += (x << 28) & bottom29Bits
 884                                 if tmp[i+8] < 0x20000000 {
 885                                         tmp[i+8] += 0x20000000 & xMask
 886                                         tmp[i+8] -= 1
 887                                         tmp[i+8] -= x >> 4
 888                                         tmp[i+9] += ((x >> 1) - 1) & xMask
 889                                 } else {
 890                                         tmp[i+8] -= 1
 891                                         tmp[i+8] -= x >> 4
 892                                         tmp[i+9] += (x >> 1) & xMask
 893                                 }
 894                         } else {
 895                                 tmp[i+7] -= set7 // 借位
 896                                 tmp[i+7] -= (x << 24) & bottom28Bits
 897                                 tmp[i+8] += (x << 28) & bottom29Bits
 898                                 if tmp[i+8] < 0x20000000 {
 899                                         tmp[i+8] += 0x20000000 & xMask
 900                                         tmp[i+8] -= x >> 4
 901                                         tmp[i+9] += ((x >> 1) - 1) & xMask
 902                                 } else {
 903                                         tmp[i+8] -= x >> 4
 904                                         tmp[i+9] += (x >> 1) & xMask
 905                                 }
 906                         }
 907
 908                 }
 909
 910                 if i+1 == 9 {
 911                         break
 912                 }
 913
 914                 tmp[i+2] += tmp[i+1] >> 28
 915                 x = tmp[i+1] & bottom28Bits
 916                 tmp[i+1] = 0
 917                 if x > 0 {
 918                         set5 := uint32(0)
 919                         set8 := uint32(0)
 920                         set9 := uint32(0)
 921                         xMask = nonZeroToAllOnes(x)
 922                         tmp[i+3] += (x << 7) & bottom28Bits
 923                         tmp[i+4] += x >> 21
 924                         if tmp[i+4] < 0x20000000 {
 925                                 set5 = 1
 926                                 tmp[i+4] += 0x20000000 & xMask
 927                                 tmp[i+4] -= (x << 11) & bottom29Bits
 928                         } else {
 929                                 tmp[i+4] -= (x << 11) & bottom29Bits
 930                         }
 931                         if tmp[i+5] < 0x10000000 {
 932                                 tmp[i+5] += 0x10000000 & xMask
 933                                 tmp[i+5] -= set5 // 借位
 934                                 tmp[i+5] -= x >> 18
 935                                 if tmp[i+6] < 0x20000000 {
 936                                         tmp[i+6] += 0x20000000 & xMask
 937                                         tmp[i+6] -= 1 // 借位
 938                                         if tmp[i+7] < 0x10000000 {
 939                                                 set8 = 1
 940                                                 tmp[i+7] += 0x10000000 & xMask
 941                                                 tmp[i+7] -= 1 // 借位
 942                                         } else {
 943                                                 tmp[i+7] -= 1 // 借位
 944                                         }
 945                                 } else {
 946                                         tmp[i+6] -= 1 // 借位
 947                                 }
 948                         } else {
 949                                 tmp[i+5] -= set5 // 借位
 950                                 tmp[i+5] -= x >> 18
 951                         }
 952                         if tmp[i+8] < 0x20000000 {
 953                                 set9 = 1
 954                                 tmp[i+8] += 0x20000000 & xMask
 955                                 tmp[i+8] -= set8
 956                                 tmp[i+8] -= (x << 25) & bottom29Bits
 957                         } else {
 958                                 tmp[i+8] -= set8
 959                                 tmp[i+8] -= (x << 25) & bottom29Bits
 960                         }
 961                         if tmp[i+9] < 0x10000000 {
 962                                 tmp[i+9] += 0x10000000 & xMask
 963                                 tmp[i+9] -= set9 // 借位
 964                                 tmp[i+9] -= x >> 4
 965                                 tmp[i+10] += (x - 1) & xMask
 966                         } else {
 967                                 tmp[i+9] -= set9 // 借位
 968                                 tmp[i+9] -= x >> 4
 969                                 tmp[i+10] += x & xMask
 970                         }
 971                 }
 972         }
 973
 974         carry = uint32(0)
 975         for i := 0; i < 8; i++ {
 976                 a[i] = tmp[i+9]
 977                 a[i] += carry
 978                 a[i] += (tmp[i+10] << 28) & bottom29Bits
 979                 carry = a[i] >> 29
 980                 a[i] &= bottom29Bits
 981
 982                 i++
 983                 a[i] = tmp[i+9] >> 1
 984                 a[i] += carry
 985                 carry = a[i] >> 28
 986                 a[i] &= bottom28Bits
 987         }
 988         a[8] = tmp[17]
 989         a[8] += carry
 990         carry = a[8] >> 29
 991         a[8] &= bottom29Bits
 992         sm2P256ReduceCarry(a, carry)
 993 }
 994
 995 // b = a
 996 func sm2P256Dup(b, a *sm2P256FieldElement) {
 997         *b = *a
 998 }
 999
1000 // X = a * R mod P
1001 func sm2P256FromBig(X *sm2P256FieldElement, a *big.Int) {
1002         x := new(big.Int).Lsh(a, 257)
1003         x.Mod(x, sm2P256.P)
1004         for i := 0; i < 9; i++ {
1005                 if bits := x.Bits(); len(bits) > 0 {
1006                         X[i] = uint32(bits[0]) & bottom29Bits
1007                 } else {
1008                         X[i] = 0
1009                 }
1010                 x.Rsh(x, 29)
1011                 i++
1012                 if i == 9 {
1013                         break
1014                 }
1015                 if bits := x.Bits(); len(bits) > 0 {
1016                         X[i] = uint32(bits[0]) & bottom28Bits
1017                 } else {
1018                         X[i] = 0
1019                 }
1020                 x.Rsh(x, 28)
1021         }
1022 }
1023
1024 // X = r * R mod P
1025 // r = X * R' mod P
1026 func sm2P256ToBig(X *sm2P256FieldElement) *big.Int {
1027         r, tm := new(big.Int), new(big.Int)
1028         r.SetInt64(int64(X[8]))
1029         for i := 7; i >= 0; i-- {
1030                 if (i & 1) == 0 {
1031                         r.Lsh(r, 29)
1032                 } else {
1033                         r.Lsh(r, 28)
1034                 }
1035                 tm.SetInt64(int64(X[i]))
1036                 r.Add(r, tm)
1037         }
1038         r.Mul(r, sm2P256.RInverse)
1039         r.Mod(r, sm2P256.P)
1040         return r
1041 }