OSDN Git Service

Hulk did something
[bytom/vapor.git] / crypto / sm2 / p256.go
1 package sm2
2
3 import (
4         "crypto/elliptic"
5         "math/big"
6         "sync"
7 )
8
9 /** 学习标准库p256的优化方法实现sm2的快速版本
10  * 标准库的p256的代码实现有些晦涩难懂,当然sm2的同样如此,有兴趣的大家可以研究研究,最后神兽压阵。。。
11  *
12  * ━━━━━━animal━━━━━━
13  *    ┏┓   ┏┓
14  *   ┏┛┻━━━┛┻┓
15  *   ┃       ┃
16  *   ┃   ━   ┃
17  *   ┃ ┳┛ ┗┳ ┃
18  *   ┃       ┃
19  *   ┃   ┻   ┃
20  *   ┃       ┃
21  *   ┗━┓   ┏━┛
22  *    ┃   ┃
23  *    ┃   ┃
24  *    ┃   ┗━━━┓
25  *          ┃     ┣┓
26  *     ┃     ┏┛
27  *    ┗┓┓┏━┳┓┏┛
28  *    ┃┫┫ ┃┫┫
29  *    ┗┻┛ ┗┻┛
30  *
31  * ━━━━━Kawaii ━━━━━━
32  */
33
34 type sm2P256Curve struct {
35         RInverse *big.Int
36         *elliptic.CurveParams
37         a, b, gx, gy sm2P256FieldElement
38 }
39
40 var initonce sync.Once
41 var sm2P256 sm2P256Curve
42
43 type sm2P256FieldElement [9]uint32
44 type sm2P256LargeFieldElement [17]uint64
45
46 const (
47         bottom28Bits = 0xFFFFFFF
48         bottom29Bits = 0x1FFFFFFF
49 )
50
51 func initP256Sm2() {
52         sm2P256.CurveParams = &elliptic.CurveParams{Name: "SM2-P-256"} // sm2
53         A, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFC", 16)
54         //SM2椭        椭 圆 曲 线 公 钥 密 码 算 法 推 荐 曲 线 参 数
55         sm2P256.P, _ = new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16)
56         sm2P256.N, _ = new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFF7203DF6B21C6052B53BBF40939D54123", 16)
57         sm2P256.B, _ = new(big.Int).SetString("28E9FA9E9D9F5E344D5A9E4BCF6509A7F39789F515AB8F92DDBCBD414D940E93", 16)
58         sm2P256.Gx, _ = new(big.Int).SetString("32C4AE2C1F1981195F9904466A39C9948FE30BBFF2660BE1715A4589334C74C7", 16)
59         sm2P256.Gy, _ = new(big.Int).SetString("BC3736A2F4F6779C59BDCEE36B692153D0A9877CC62A474002DF32E52139F0A0", 16)
60         sm2P256.RInverse, _ = new(big.Int).SetString("7ffffffd80000002fffffffe000000017ffffffe800000037ffffffc80000002", 16)
61         sm2P256.BitSize = 256
62         sm2P256FromBig(&sm2P256.a, A)
63         sm2P256FromBig(&sm2P256.gx, sm2P256.Gx)
64         sm2P256FromBig(&sm2P256.gy, sm2P256.Gy)
65         sm2P256FromBig(&sm2P256.b, sm2P256.B)
66 }
67
68 func P256Sm2() elliptic.Curve {
69         initonce.Do(initP256Sm2)
70         return sm2P256
71 }
72
73 func (curve sm2P256Curve) Params() *elliptic.CurveParams {
74         return sm2P256.CurveParams
75 }
76
77 // y^2 = x^3 + ax + b
78 func (curve sm2P256Curve) IsOnCurve(X, Y *big.Int) bool {
79         var a, x, y, y2, x3 sm2P256FieldElement
80
81         sm2P256FromBig(&x, X)
82         sm2P256FromBig(&y, Y)
83
84         sm2P256Square(&x3, &x)       // x3 = x ^ 2
85         sm2P256Mul(&x3, &x3, &x)     // x3 = x ^ 2 * x
86         sm2P256Mul(&a, &curve.a, &x) // a = a * x
87         sm2P256Add(&x3, &x3, &a)
88         sm2P256Add(&x3, &x3, &curve.b)
89
90         sm2P256Square(&y2, &y) // y2 = y ^ 2
91         return sm2P256ToBig(&x3).Cmp(sm2P256ToBig(&y2)) == 0
92 }
93
94 func zForAffine(x, y *big.Int) *big.Int {
95         z := new(big.Int)
96         if x.Sign() != 0 || y.Sign() != 0 {
97                 z.SetInt64(1)
98         }
99         return z
100 }
101
102 func (curve sm2P256Curve) Add(x1, y1, x2, y2 *big.Int) (*big.Int, *big.Int) {
103         var X1, Y1, Z1, X2, Y2, Z2, X3, Y3, Z3 sm2P256FieldElement
104
105         z1 := zForAffine(x1, y1)
106         z2 := zForAffine(x2, y2)
107         sm2P256FromBig(&X1, x1)
108         sm2P256FromBig(&Y1, y1)
109         sm2P256FromBig(&Z1, z1)
110         sm2P256FromBig(&X2, x2)
111         sm2P256FromBig(&Y2, y2)
112         sm2P256FromBig(&Z2, z2)
113         sm2P256PointAdd(&X1, &Y1, &Z1, &X2, &Y2, &Z2, &X3, &Y3, &Z3)
114         return sm2P256ToAffine(&X3, &Y3, &Z3)
115 }
116
117 func (curve sm2P256Curve) Double(x1, y1 *big.Int) (*big.Int, *big.Int) {
118         var X1, Y1, Z1 sm2P256FieldElement
119
120         z1 := zForAffine(x1, y1)
121         sm2P256FromBig(&X1, x1)
122         sm2P256FromBig(&Y1, y1)
123         sm2P256FromBig(&Z1, z1)
124         sm2P256PointDouble(&X1, &Y1, &Z1, &X1, &Y1, &Z1)
125         return sm2P256ToAffine(&X1, &Y1, &Z1)
126 }
127
128 func (curve sm2P256Curve) ScalarMult(x1, y1 *big.Int, k []byte) (*big.Int, *big.Int) {
129         var scalarReversed [32]byte
130         var X, Y, Z, X1, Y1 sm2P256FieldElement
131
132         sm2P256FromBig(&X1, x1)
133         sm2P256FromBig(&Y1, y1)
134         sm2P256GetScalar(&scalarReversed, k)
135         sm2P256ScalarMult(&X, &Y, &Z, &X1, &Y1, &scalarReversed)
136         return sm2P256ToAffine(&X, &Y, &Z)
137 }
138
139 func (curve sm2P256Curve) ScalarBaseMult(k []byte) (*big.Int, *big.Int) {
140         var scalarReversed [32]byte
141         var X, Y, Z sm2P256FieldElement
142
143         sm2P256GetScalar(&scalarReversed, k)
144         sm2P256ScalarBaseMult(&X, &Y, &Z, &scalarReversed)
145         return sm2P256ToAffine(&X, &Y, &Z)
146 }
147
148 var sm2P256Precomputed = [9 * 2 * 15 * 2]uint32{
149         0x830053d, 0x328990f, 0x6c04fe1, 0xc0f72e5, 0x1e19f3c, 0x666b093, 0x175a87b, 0xec38276, 0x222cf4b,
150         0x185a1bba, 0x354e593, 0x1295fac1, 0xf2bc469, 0x47c60fa, 0xc19b8a9, 0xf63533e, 0x903ae6b, 0xc79acba,
151         0x15b061a4, 0x33e020b, 0xdffb34b, 0xfcf2c8, 0x16582e08, 0x262f203, 0xfb34381, 0xa55452, 0x604f0ff,
152         0x41f1f90, 0xd64ced2, 0xee377bf, 0x75f05f0, 0x189467ae, 0xe2244e, 0x1e7700e8, 0x3fbc464, 0x9612d2e,
153         0x1341b3b8, 0xee84e23, 0x1edfa5b4, 0x14e6030, 0x19e87be9, 0x92f533c, 0x1665d96c, 0x226653e, 0xa238d3e,
154         0xf5c62c, 0x95bb7a, 0x1f0e5a41, 0x28789c3, 0x1f251d23, 0x8726609, 0xe918910, 0x8096848, 0xf63d028,
155         0x152296a1, 0x9f561a8, 0x14d376fb, 0x898788a, 0x61a95fb, 0xa59466d, 0x159a003d, 0x1ad1698, 0x93cca08,
156         0x1b314662, 0x706e006, 0x11ce1e30, 0x97b710, 0x172fbc0d, 0x8f50158, 0x11c7ffe7, 0xd182cce, 0xc6ad9e8,
157         0x12ea31b2, 0xc4e4f38, 0x175b0d96, 0xec06337, 0x75a9c12, 0xb001fdf, 0x93e82f5, 0x34607de, 0xb8035ed,
158         0x17f97924, 0x75cf9e6, 0xdceaedd, 0x2529924, 0x1a10c5ff, 0xb1a54dc, 0x19464d8, 0x2d1997, 0xde6a110,
159         0x1e276ee5, 0x95c510c, 0x1aca7c7a, 0xfe48aca, 0x121ad4d9, 0xe4132c6, 0x8239b9d, 0x40ea9cd, 0x816c7b,
160         0x632d7a4, 0xa679813, 0x5911fcf, 0x82b0f7c, 0x57b0ad5, 0xbef65, 0xd541365, 0x7f9921f, 0xc62e7a,
161         0x3f4b32d, 0x58e50e1, 0x6427aed, 0xdcdda67, 0xe8c2d3e, 0x6aa54a4, 0x18df4c35, 0x49a6a8e, 0x3cd3d0c,
162         0xd7adf2, 0xcbca97, 0x1bda5f2d, 0x3258579, 0x606b1e6, 0x6fc1b5b, 0x1ac27317, 0x503ca16, 0xa677435,
163         0x57bc73, 0x3992a42, 0xbab987b, 0xfab25eb, 0x128912a4, 0x90a1dc4, 0x1402d591, 0x9ffbcfc, 0xaa48856,
164         0x7a7c2dc, 0xcefd08a, 0x1b29bda6, 0xa785641, 0x16462d8c, 0x76241b7, 0x79b6c3b, 0x204ae18, 0xf41212b,
165         0x1f567a4d, 0xd6ce6db, 0xedf1784, 0x111df34, 0x85d7955, 0x55fc189, 0x1b7ae265, 0xf9281ac, 0xded7740,
166         0xf19468b, 0x83763bb, 0x8ff7234, 0x3da7df8, 0x9590ac3, 0xdc96f2a, 0x16e44896, 0x7931009, 0x99d5acc,
167         0x10f7b842, 0xaef5e84, 0xc0310d7, 0xdebac2c, 0x2a7b137, 0x4342344, 0x19633649, 0x3a10624, 0x4b4cb56,
168         0x1d809c59, 0xac007f, 0x1f0f4bcd, 0xa1ab06e, 0xc5042cf, 0x82c0c77, 0x76c7563, 0x22c30f3, 0x3bf1568,
169         0x7a895be, 0xfcca554, 0x12e90e4c, 0x7b4ab5f, 0x13aeb76b, 0x5887e2c, 0x1d7fe1e3, 0x908c8e3, 0x95800ee,
170         0xb36bd54, 0xf08905d, 0x4e73ae8, 0xf5a7e48, 0xa67cb0, 0x50e1067, 0x1b944a0a, 0xf29c83a, 0xb23cfb9,
171         0xbe1db1, 0x54de6e8, 0xd4707f2, 0x8ebcc2d, 0x2c77056, 0x1568ce4, 0x15fcc849, 0x4069712, 0xe2ed85f,
172         0x2c5ff09, 0x42a6929, 0x628e7ea, 0xbd5b355, 0xaf0bd79, 0xaa03699, 0xdb99816, 0x4379cef, 0x81d57b,
173         0x11237f01, 0xe2a820b, 0xfd53b95, 0x6beb5ee, 0x1aeb790c, 0xe470d53, 0x2c2cfee, 0x1c1d8d8, 0xa520fc4,
174         0x1518e034, 0xa584dd4, 0x29e572b, 0xd4594fc, 0x141a8f6f, 0x8dfccf3, 0x5d20ba3, 0x2eb60c3, 0x9f16eb0,
175         0x11cec356, 0xf039f84, 0x1b0990c1, 0xc91e526, 0x10b65bae, 0xf0616e8, 0x173fa3ff, 0xec8ccf9, 0xbe32790,
176         0x11da3e79, 0xe2f35c7, 0x908875c, 0xdacf7bd, 0x538c165, 0x8d1487f, 0x7c31aed, 0x21af228, 0x7e1689d,
177         0xdfc23ca, 0x24f15dc, 0x25ef3c4, 0x35248cd, 0x99a0f43, 0xa4b6ecc, 0xd066b3, 0x2481152, 0x37a7688,
178         0x15a444b6, 0xb62300c, 0x4b841b, 0xa655e79, 0xd53226d, 0xbeb348a, 0x127f3c2, 0xb989247, 0x71a277d,
179         0x19e9dfcb, 0xb8f92d0, 0xe2d226c, 0x390a8b0, 0x183cc462, 0x7bd8167, 0x1f32a552, 0x5e02db4, 0xa146ee9,
180         0x1a003957, 0x1c95f61, 0x1eeec155, 0x26f811f, 0xf9596ba, 0x3082bfb, 0x96df083, 0x3e3a289, 0x7e2d8be,
181         0x157a63e0, 0x99b8941, 0x1da7d345, 0xcc6cd0, 0x10beed9a, 0x48e83c0, 0x13aa2e25, 0x7cad710, 0x4029988,
182         0x13dfa9dd, 0xb94f884, 0x1f4adfef, 0xb88543, 0x16f5f8dc, 0xa6a67f4, 0x14e274e2, 0x5e56cf4, 0x2f24ef,
183         0x1e9ef967, 0xfe09bad, 0xfe079b3, 0xcc0ae9e, 0xb3edf6d, 0x3e961bc, 0x130d7831, 0x31043d6, 0xba986f9,
184         0x1d28055, 0x65240ca, 0x4971fa3, 0x81b17f8, 0x11ec34a5, 0x8366ddc, 0x1471809, 0xfa5f1c6, 0xc911e15,
185         0x8849491, 0xcf4c2e2, 0x14471b91, 0x39f75be, 0x445c21e, 0xf1585e9, 0x72cc11f, 0x4c79f0c, 0xe5522e1,
186         0x1874c1ee, 0x4444211, 0x7914884, 0x3d1b133, 0x25ba3c, 0x4194f65, 0x1c0457ef, 0xac4899d, 0xe1fa66c,
187         0x130a7918, 0x9b8d312, 0x4b1c5c8, 0x61ccac3, 0x18c8aa6f, 0xe93cb0a, 0xdccb12c, 0xde10825, 0x969737d,
188         0xf58c0c3, 0x7cee6a9, 0xc2c329a, 0xc7f9ed9, 0x107b3981, 0x696a40e, 0x152847ff, 0x4d88754, 0xb141f47,
189         0x5a16ffe, 0x3a7870a, 0x18667659, 0x3b72b03, 0xb1c9435, 0x9285394, 0xa00005a, 0x37506c, 0x2edc0bb,
190         0x19afe392, 0xeb39cac, 0x177ef286, 0xdf87197, 0x19f844ed, 0x31fe8, 0x15f9bfd, 0x80dbec, 0x342e96e,
191         0x497aced, 0xe88e909, 0x1f5fa9ba, 0x530a6ee, 0x1ef4e3f1, 0x69ffd12, 0x583006d, 0x2ecc9b1, 0x362db70,
192         0x18c7bdc5, 0xf4bb3c5, 0x1c90b957, 0xf067c09, 0x9768f2b, 0xf73566a, 0x1939a900, 0x198c38a, 0x202a2a1,
193         0x4bbf5a6, 0x4e265bc, 0x1f44b6e7, 0x185ca49, 0xa39e81b, 0x24aff5b, 0x4acc9c2, 0x638bdd3, 0xb65b2a8,
194         0x6def8be, 0xb94537a, 0x10b81dee, 0xe00ec55, 0x2f2cdf7, 0xc20622d, 0x2d20f36, 0xe03c8c9, 0x898ea76,
195         0x8e3921b, 0x8905bff, 0x1e94b6c8, 0xee7ad86, 0x154797f2, 0xa620863, 0x3fbd0d9, 0x1f3caab, 0x30c24bd,
196         0x19d3892f, 0x59c17a2, 0x1ab4b0ae, 0xf8714ee, 0x90c4098, 0xa9c800d, 0x1910236b, 0xea808d3, 0x9ae2f31,
197         0x1a15ad64, 0xa48c8d1, 0x184635a4, 0xb725ef1, 0x11921dcc, 0x3f866df, 0x16c27568, 0xbdf580a, 0xb08f55c,
198         0x186ee1c, 0xb1627fa, 0x34e82f6, 0x933837e, 0xf311be5, 0xfedb03b, 0x167f72cd, 0xa5469c0, 0x9c82531,
199         0xb92a24b, 0x14fdc8b, 0x141980d1, 0xbdc3a49, 0x7e02bb1, 0xaf4e6dd, 0x106d99e1, 0xd4616fc, 0x93c2717,
200         0x1c0a0507, 0xc6d5fed, 0x9a03d8b, 0xa1d22b0, 0x127853e3, 0xc4ac6b8, 0x1a048cf7, 0x9afb72c, 0x65d485d,
201         0x72d5998, 0xe9fa744, 0xe49e82c, 0x253cf80, 0x5f777ce, 0xa3799a5, 0x17270cbb, 0xc1d1ef0, 0xdf74977,
202         0x114cb859, 0xfa8e037, 0xb8f3fe5, 0xc734cc6, 0x70d3d61, 0xeadac62, 0x12093dd0, 0x9add67d, 0x87200d6,
203         0x175bcbb, 0xb29b49f, 0x1806b79c, 0x12fb61f, 0x170b3a10, 0x3aaf1cf, 0xa224085, 0x79d26af, 0x97759e2,
204         0x92e19f1, 0xb32714d, 0x1f00d9f1, 0xc728619, 0x9e6f627, 0xe745e24, 0x18ea4ace, 0xfc60a41, 0x125f5b2,
205         0xc3cf512, 0x39ed486, 0xf4d15fa, 0xf9167fd, 0x1c1f5dd5, 0xc21a53e, 0x1897930, 0x957a112, 0x21059a0,
206         0x1f9e3ddc, 0xa4dfced, 0x8427f6f, 0x726fbe7, 0x1ea658f8, 0x2fdcd4c, 0x17e9b66f, 0xb2e7c2e, 0x39923bf,
207         0x1bae104, 0x3973ce5, 0xc6f264c, 0x3511b84, 0x124195d7, 0x11996bd, 0x20be23d, 0xdc437c4, 0x4b4f16b,
208         0x11902a0, 0x6c29cc9, 0x1d5ffbe6, 0xdb0b4c7, 0x10144c14, 0x2f2b719, 0x301189, 0x2343336, 0xa0bf2ac,
209 }
210
211 func sm2P256GetScalar(b *[32]byte, a []byte) {
212         var scalarBytes []byte
213
214         n := new(big.Int).SetBytes(a)
215         if n.Cmp(sm2P256.N) >= 0 {
216                 n.Mod(n, sm2P256.N)
217                 scalarBytes = n.Bytes()
218         } else {
219                 scalarBytes = a
220         }
221         for i, v := range scalarBytes {
222                 b[len(scalarBytes)-(1+i)] = v
223         }
224 }
225
226 func sm2P256PointAddMixed(xOut, yOut, zOut, x1, y1, z1, x2, y2 *sm2P256FieldElement) {
227         var z1z1, z1z1z1, s2, u2, h, i, j, r, rr, v, tmp sm2P256FieldElement
228
229         sm2P256Square(&z1z1, z1)
230         sm2P256Add(&tmp, z1, z1)
231
232         sm2P256Mul(&u2, x2, &z1z1)
233         sm2P256Mul(&z1z1z1, z1, &z1z1)
234         sm2P256Mul(&s2, y2, &z1z1z1)
235         sm2P256Sub(&h, &u2, x1)
236         sm2P256Add(&i, &h, &h)
237         sm2P256Square(&i, &i)
238         sm2P256Mul(&j, &h, &i)
239         sm2P256Sub(&r, &s2, y1)
240         sm2P256Add(&r, &r, &r)
241         sm2P256Mul(&v, x1, &i)
242
243         sm2P256Mul(zOut, &tmp, &h)
244         sm2P256Square(&rr, &r)
245         sm2P256Sub(xOut, &rr, &j)
246         sm2P256Sub(xOut, xOut, &v)
247         sm2P256Sub(xOut, xOut, &v)
248
249         sm2P256Sub(&tmp, &v, xOut)
250         sm2P256Mul(yOut, &tmp, &r)
251         sm2P256Mul(&tmp, y1, &j)
252         sm2P256Sub(yOut, yOut, &tmp)
253         sm2P256Sub(yOut, yOut, &tmp)
254 }
255
256 // sm2P256CopyConditional sets out=in if mask = 0xffffffff in constant time.
257 //
258 // On entry: mask is either 0 or 0xffffffff.
259 func sm2P256CopyConditional(out, in *sm2P256FieldElement, mask uint32) {
260         for i := 0; i < 9; i++ {
261                 tmp := mask & (in[i] ^ out[i])
262                 out[i] ^= tmp
263         }
264 }
265
266 // sm2P256SelectAffinePoint sets {out_x,out_y} to the index'th entry of table.
267 // On entry: index < 16, table[0] must be zero.
268 func sm2P256SelectAffinePoint(xOut, yOut *sm2P256FieldElement, table []uint32, index uint32) {
269         for i := range xOut {
270                 xOut[i] = 0
271         }
272         for i := range yOut {
273                 yOut[i] = 0
274         }
275
276         for i := uint32(1); i < 16; i++ {
277                 mask := i ^ index
278                 mask |= mask >> 2
279                 mask |= mask >> 1
280                 mask &= 1
281                 mask--
282                 for j := range xOut {
283                         xOut[j] |= table[0] & mask
284                         table = table[1:]
285                 }
286                 for j := range yOut {
287                         yOut[j] |= table[0] & mask
288                         table = table[1:]
289                 }
290         }
291 }
292
293 // sm2P256SelectJacobianPoint sets {out_x,out_y,out_z} to the index'th entry of
294 // table.
295 // On entry: index < 16, table[0] must be zero.
296 func sm2P256SelectJacobianPoint(xOut, yOut, zOut *sm2P256FieldElement, table *[16][3]sm2P256FieldElement, index uint32) {
297         for i := range xOut {
298                 xOut[i] = 0
299         }
300         for i := range yOut {
301                 yOut[i] = 0
302         }
303         for i := range zOut {
304                 zOut[i] = 0
305         }
306
307         // The implicit value at index 0 is all zero. We don't need to perform that
308         // iteration of the loop because we already set out_* to zero.
309         for i := uint32(1); i < 16; i++ {
310                 mask := i ^ index
311                 mask |= mask >> 2
312                 mask |= mask >> 1
313                 mask &= 1
314                 mask--
315                 for j := range xOut {
316                         xOut[j] |= table[i][0][j] & mask
317                 }
318                 for j := range yOut {
319                         yOut[j] |= table[i][1][j] & mask
320                 }
321                 for j := range zOut {
322                         zOut[j] |= table[i][2][j] & mask
323                 }
324         }
325 }
326
327 // sm2P256GetBit returns the bit'th bit of scalar.
328 func sm2P256GetBit(scalar *[32]uint8, bit uint) uint32 {
329         return uint32(((scalar[bit>>3]) >> (bit & 7)) & 1)
330 }
331
332 // sm2P256ScalarBaseMult sets {xOut,yOut,zOut} = scalar*G where scalar is a
333 // little-endian number. Note that the value of scalar must be less than the
334 // order of the group.
335 func sm2P256ScalarBaseMult(xOut, yOut, zOut *sm2P256FieldElement, scalar *[32]uint8) {
336         nIsInfinityMask := ^uint32(0)
337         var px, py, tx, ty, tz sm2P256FieldElement
338         var pIsNoninfiniteMask, mask, tableOffset uint32
339
340         for i := range xOut {
341                 xOut[i] = 0
342         }
343         for i := range yOut {
344                 yOut[i] = 0
345         }
346         for i := range zOut {
347                 zOut[i] = 0
348         }
349
350         // The loop adds bits at positions 0, 64, 128 and 192, followed by
351         // positions 32,96,160 and 224 and does this 32 times.
352         for i := uint(0); i < 32; i++ {
353                 if i != 0 {
354                         sm2P256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut)
355                 }
356                 tableOffset = 0
357                 for j := uint(0); j <= 32; j += 32 {
358                         bit0 := sm2P256GetBit(scalar, 31-i+j)
359                         bit1 := sm2P256GetBit(scalar, 95-i+j)
360                         bit2 := sm2P256GetBit(scalar, 159-i+j)
361                         bit3 := sm2P256GetBit(scalar, 223-i+j)
362                         index := bit0 | (bit1 << 1) | (bit2 << 2) | (bit3 << 3)
363
364                         sm2P256SelectAffinePoint(&px, &py, sm2P256Precomputed[tableOffset:], index)
365                         tableOffset += 30 * 9
366
367                         // Since scalar is less than the order of the group, we know that
368                         // {xOut,yOut,zOut} != {px,py,1}, unless both are zero, which we handle
369                         // below.
370                         sm2P256PointAddMixed(&tx, &ty, &tz, xOut, yOut, zOut, &px, &py)
371                         // The result of pointAddMixed is incorrect if {xOut,yOut,zOut} is zero
372                         // (a.k.a.  the point at infinity). We handle that situation by
373                         // copying the point from the table.
374                         sm2P256CopyConditional(xOut, &px, nIsInfinityMask)
375                         sm2P256CopyConditional(yOut, &py, nIsInfinityMask)
376                         sm2P256CopyConditional(zOut, &sm2P256Factor[1], nIsInfinityMask)
377
378                         // Equally, the result is also wrong if the point from the table is
379                         // zero, which happens when the index is zero. We handle that by
380                         // only copying from {tx,ty,tz} to {xOut,yOut,zOut} if index != 0.
381                         pIsNoninfiniteMask = nonZeroToAllOnes(index)
382                         mask = pIsNoninfiniteMask & ^nIsInfinityMask
383                         sm2P256CopyConditional(xOut, &tx, mask)
384                         sm2P256CopyConditional(yOut, &ty, mask)
385                         sm2P256CopyConditional(zOut, &tz, mask)
386                         // If p was not zero, then n is now non-zero.
387                         nIsInfinityMask &^= pIsNoninfiniteMask
388                 }
389         }
390 }
391
392 func sm2P256ScalarMult(xOut, yOut, zOut, x, y *sm2P256FieldElement, scalar *[32]uint8) {
393         var precomp [16][3]sm2P256FieldElement
394         var px, py, pz, tx, ty, tz sm2P256FieldElement
395         var nIsInfinityMask, index, pIsNoninfiniteMask, mask uint32
396
397         // We precompute 0,1,2,... times {x,y}.
398         precomp[1][0] = *x
399         precomp[1][1] = *y
400         precomp[1][2] = sm2P256Factor[1]
401
402         for i := 2; i < 16; i += 2 {
403                 sm2P256PointDouble(&precomp[i][0], &precomp[i][1], &precomp[i][2], &precomp[i/2][0], &precomp[i/2][1], &precomp[i/2][2])
404                 sm2P256PointAddMixed(&precomp[i+1][0], &precomp[i+1][1], &precomp[i+1][2], &precomp[i][0], &precomp[i][1], &precomp[i][2], x, y)
405         }
406
407         for i := range xOut {
408                 xOut[i] = 0
409         }
410         for i := range yOut {
411                 yOut[i] = 0
412         }
413         for i := range zOut {
414                 zOut[i] = 0
415         }
416         nIsInfinityMask = ^uint32(0)
417
418         // We add in a window of four bits each iteration and do this 64 times.
419         for i := 0; i < 64; i++ {
420                 if i != 0 {
421                         sm2P256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut)
422                         sm2P256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut)
423                         sm2P256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut)
424                         sm2P256PointDouble(xOut, yOut, zOut, xOut, yOut, zOut)
425                 }
426
427                 index = uint32(scalar[31-i/2])
428                 if (i & 1) == 1 {
429                         index &= 15
430                 } else {
431                         index >>= 4
432                 }
433
434                 // See the comments in scalarBaseMult about handling infinities.
435                 sm2P256SelectJacobianPoint(&px, &py, &pz, &precomp, index)
436                 sm2P256PointAdd(xOut, yOut, zOut, &px, &py, &pz, &tx, &ty, &tz)
437                 sm2P256CopyConditional(xOut, &px, nIsInfinityMask)
438                 sm2P256CopyConditional(yOut, &py, nIsInfinityMask)
439                 sm2P256CopyConditional(zOut, &pz, nIsInfinityMask)
440
441                 pIsNoninfiniteMask = nonZeroToAllOnes(index)
442                 mask = pIsNoninfiniteMask & ^nIsInfinityMask
443                 sm2P256CopyConditional(xOut, &tx, mask)
444                 sm2P256CopyConditional(yOut, &ty, mask)
445                 sm2P256CopyConditional(zOut, &tz, mask)
446                 nIsInfinityMask &^= pIsNoninfiniteMask
447         }
448 }
449
450 func sm2P256PointToAffine(xOut, yOut, x, y, z *sm2P256FieldElement) {
451         var zInv, zInvSq sm2P256FieldElement
452
453         zz := sm2P256ToBig(z)
454         zz.ModInverse(zz, sm2P256.P)
455         sm2P256FromBig(&zInv, zz)
456
457         sm2P256Square(&zInvSq, &zInv)
458         sm2P256Mul(xOut, x, &zInvSq)
459         sm2P256Mul(&zInv, &zInv, &zInvSq)
460         sm2P256Mul(yOut, y, &zInv)
461 }
462
463 func sm2P256ToAffine(x, y, z *sm2P256FieldElement) (xOut, yOut *big.Int) {
464         var xx, yy sm2P256FieldElement
465
466         sm2P256PointToAffine(&xx, &yy, x, y, z)
467         return sm2P256ToBig(&xx), sm2P256ToBig(&yy)
468 }
469
470 var sm2P256Factor = []sm2P256FieldElement{
471         sm2P256FieldElement{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
472         sm2P256FieldElement{0x2, 0x0, 0x1FFFFF00, 0x7FF, 0x0, 0x0, 0x0, 0x2000000, 0x0},
473         sm2P256FieldElement{0x4, 0x0, 0x1FFFFE00, 0xFFF, 0x0, 0x0, 0x0, 0x4000000, 0x0},
474         sm2P256FieldElement{0x6, 0x0, 0x1FFFFD00, 0x17FF, 0x0, 0x0, 0x0, 0x6000000, 0x0},
475         sm2P256FieldElement{0x8, 0x0, 0x1FFFFC00, 0x1FFF, 0x0, 0x0, 0x0, 0x8000000, 0x0},
476         sm2P256FieldElement{0xA, 0x0, 0x1FFFFB00, 0x27FF, 0x0, 0x0, 0x0, 0xA000000, 0x0},
477         sm2P256FieldElement{0xC, 0x0, 0x1FFFFA00, 0x2FFF, 0x0, 0x0, 0x0, 0xC000000, 0x0},
478         sm2P256FieldElement{0xE, 0x0, 0x1FFFF900, 0x37FF, 0x0, 0x0, 0x0, 0xE000000, 0x0},
479         sm2P256FieldElement{0x10, 0x0, 0x1FFFF800, 0x3FFF, 0x0, 0x0, 0x0, 0x0, 0x01},
480 }
481
482 func sm2P256Scalar(b *sm2P256FieldElement, a int) {
483         sm2P256Mul(b, b, &sm2P256Factor[a])
484 }
485
486 // (x3, y3, z3) = (x1, y1, z1) + (x2, y2, z2)
487 func sm2P256PointAdd(x1, y1, z1, x2, y2, z2, x3, y3, z3 *sm2P256FieldElement) {
488         var u1, u2, z22, z12, z23, z13, s1, s2, h, h2, r, r2, tm sm2P256FieldElement
489
490         if sm2P256ToBig(z1).Sign() == 0 {
491                 sm2P256Dup(x3, x2)
492                 sm2P256Dup(y3, y2)
493                 sm2P256Dup(z3, z2)
494                 return
495         }
496
497         if sm2P256ToBig(z2).Sign() == 0 {
498                 sm2P256Dup(x3, x1)
499                 sm2P256Dup(y3, y1)
500                 sm2P256Dup(z3, z1)
501                 return
502         }
503
504         sm2P256Square(&z12, z1) // z12 = z1 ^ 2
505         sm2P256Square(&z22, z2) // z22 = z2 ^ 2
506
507         sm2P256Mul(&z13, &z12, z1) // z13 = z1 ^ 3
508         sm2P256Mul(&z23, &z22, z2) // z23 = z2 ^ 3
509
510         sm2P256Mul(&u1, x1, &z22) // u1 = x1 * z2 ^ 2
511         sm2P256Mul(&u2, x2, &z12) // u2 = x2 * z1 ^ 2
512
513         sm2P256Mul(&s1, y1, &z23) // s1 = y1 * z2 ^ 3
514         sm2P256Mul(&s2, y2, &z13) // s2 = y2 * z1 ^ 3
515
516         if sm2P256ToBig(&u1).Cmp(sm2P256ToBig(&u2)) == 0 &&
517                 sm2P256ToBig(&s1).Cmp(sm2P256ToBig(&s2)) == 0 {
518                 sm2P256PointDouble(x1, y1, z1, x1, y1, z1)
519         }
520
521         sm2P256Sub(&h, &u2, &u1) // h = u2 - u1
522         sm2P256Sub(&r, &s2, &s1) // r = s2 - s1
523
524         sm2P256Square(&r2, &r) // r2 = r ^ 2
525         sm2P256Square(&h2, &h) // h2 = h ^ 2
526
527         sm2P256Mul(&tm, &h2, &h) // tm = h ^ 3
528         sm2P256Sub(x3, &r2, &tm)
529         sm2P256Mul(&tm, &u1, &h2)
530         sm2P256Scalar(&tm, 2)   // tm = 2 * (u1 * h ^ 2)
531         sm2P256Sub(x3, x3, &tm) // x3 = r ^ 2 - h ^ 3 - 2 * u1 * h ^ 2
532
533         sm2P256Mul(&tm, &u1, &h2) // tm = u1 * h ^ 2
534         sm2P256Sub(&tm, &tm, x3)  // tm = u1 * h ^ 2 - x3
535         sm2P256Mul(y3, &r, &tm)
536         sm2P256Mul(&tm, &h2, &h)  // tm = h ^ 3
537         sm2P256Mul(&tm, &tm, &s1) // tm = s1 * h ^ 3
538         sm2P256Sub(y3, y3, &tm)   // y3 = r * (u1 * h ^ 2 - x3) - s1 * h ^ 3
539
540         sm2P256Mul(z3, z1, z2)
541         sm2P256Mul(z3, z3, &h) // z3 = z1 * z3 * h
542 }
543
544 func sm2P256PointDouble(x3, y3, z3, x, y, z *sm2P256FieldElement) {
545         var s, m, m2, x2, y2, z2, z4, y4, az4 sm2P256FieldElement
546
547         sm2P256Square(&x2, x) // x2 = x ^ 2
548         sm2P256Square(&y2, y) // y2 = y ^ 2
549         sm2P256Square(&z2, z) // z2 = z ^ 2
550
551         sm2P256Square(&z4, z)   // z4 = z ^ 2
552         sm2P256Mul(&z4, &z4, z) // z4 = z ^ 3
553         sm2P256Mul(&z4, &z4, z) // z4 = z ^ 4
554
555         sm2P256Square(&y4, y)   // y4 = y ^ 2
556         sm2P256Mul(&y4, &y4, y) // y4 = y ^ 3
557         sm2P256Mul(&y4, &y4, y) // y4 = y ^ 4
558         sm2P256Scalar(&y4, 8)   // y4 = 8 * y ^ 4
559
560         sm2P256Mul(&s, x, &y2)
561         sm2P256Scalar(&s, 4) // s = 4 * x * y ^ 2
562
563         sm2P256Dup(&m, &x2)
564         sm2P256Scalar(&m, 3)
565         sm2P256Mul(&az4, &sm2P256.a, &z4)
566         sm2P256Add(&m, &m, &az4) // m = 3 * x ^ 2 + a * z ^ 4
567
568         sm2P256Square(&m2, &m) // m2 = m ^ 2
569
570         sm2P256Add(z3, y, z)
571         sm2P256Square(z3, z3)
572         sm2P256Sub(z3, z3, &z2)
573         sm2P256Sub(z3, z3, &y2) // z' = (y + z) ^2 - z ^ 2 - y ^ 2
574
575         sm2P256Sub(x3, &m2, &s)
576         sm2P256Sub(x3, x3, &s) // x' = m2 - 2 * s
577
578         sm2P256Sub(y3, &s, x3)
579         sm2P256Mul(y3, y3, &m)
580         sm2P256Sub(y3, y3, &y4) // y' = m * (s - x') - 8 * y ^ 4
581 }
582
583 // p256Zero31 is 0 mod p.
584 var sm2P256Zero31 = sm2P256FieldElement{0x7FFFFFF8, 0x3FFFFFFC, 0x800003FC, 0x3FFFDFFC, 0x7FFFFFFC, 0x3FFFFFFC, 0x7FFFFFFC, 0x37FFFFFC, 0x7FFFFFFC}
585
586 // c = a + b
587 func sm2P256Add(c, a, b *sm2P256FieldElement) {
588         carry := uint32(0)
589         for i := 0; ; i++ {
590                 c[i] = a[i] + b[i]
591                 c[i] += carry
592                 carry = c[i] >> 29
593                 c[i] &= bottom29Bits
594                 i++
595                 if i == 9 {
596                         break
597                 }
598                 c[i] = a[i] + b[i]
599                 c[i] += carry
600                 carry = c[i] >> 28
601                 c[i] &= bottom28Bits
602         }
603         sm2P256ReduceCarry(c, carry)
604 }
605
606 // c = a - b
607 func sm2P256Sub(c, a, b *sm2P256FieldElement) {
608         var carry uint32
609
610         for i := 0; ; i++ {
611                 c[i] = a[i] - b[i]
612                 c[i] += sm2P256Zero31[i]
613                 c[i] += carry
614                 carry = c[i] >> 29
615                 c[i] &= bottom29Bits
616                 i++
617                 if i == 9 {
618                         break
619                 }
620                 c[i] = a[i] - b[i]
621                 c[i] += sm2P256Zero31[i]
622                 c[i] += carry
623                 carry = c[i] >> 28
624                 c[i] &= bottom28Bits
625         }
626         sm2P256ReduceCarry(c, carry)
627 }
628
629 // c = a * b
630 func sm2P256Mul(c, a, b *sm2P256FieldElement) {
631         var tmp sm2P256LargeFieldElement
632
633         tmp[0] = uint64(a[0]) * uint64(b[0])
634         tmp[1] = uint64(a[0])*(uint64(b[1])<<0) +
635                 uint64(a[1])*(uint64(b[0])<<0)
636         tmp[2] = uint64(a[0])*(uint64(b[2])<<0) +
637                 uint64(a[1])*(uint64(b[1])<<1) +
638                 uint64(a[2])*(uint64(b[0])<<0)
639         tmp[3] = uint64(a[0])*(uint64(b[3])<<0) +
640                 uint64(a[1])*(uint64(b[2])<<0) +
641                 uint64(a[2])*(uint64(b[1])<<0) +
642                 uint64(a[3])*(uint64(b[0])<<0)
643         tmp[4] = uint64(a[0])*(uint64(b[4])<<0) +
644                 uint64(a[1])*(uint64(b[3])<<1) +
645                 uint64(a[2])*(uint64(b[2])<<0) +
646                 uint64(a[3])*(uint64(b[1])<<1) +
647                 uint64(a[4])*(uint64(b[0])<<0)
648         tmp[5] = uint64(a[0])*(uint64(b[5])<<0) +
649                 uint64(a[1])*(uint64(b[4])<<0) +
650                 uint64(a[2])*(uint64(b[3])<<0) +
651                 uint64(a[3])*(uint64(b[2])<<0) +
652                 uint64(a[4])*(uint64(b[1])<<0) +
653                 uint64(a[5])*(uint64(b[0])<<0)
654         tmp[6] = uint64(a[0])*(uint64(b[6])<<0) +
655                 uint64(a[1])*(uint64(b[5])<<1) +
656                 uint64(a[2])*(uint64(b[4])<<0) +
657                 uint64(a[3])*(uint64(b[3])<<1) +
658                 uint64(a[4])*(uint64(b[2])<<0) +
659                 uint64(a[5])*(uint64(b[1])<<1) +
660                 uint64(a[6])*(uint64(b[0])<<0)
661         tmp[7] = uint64(a[0])*(uint64(b[7])<<0) +
662                 uint64(a[1])*(uint64(b[6])<<0) +
663                 uint64(a[2])*(uint64(b[5])<<0) +
664                 uint64(a[3])*(uint64(b[4])<<0) +
665                 uint64(a[4])*(uint64(b[3])<<0) +
666                 uint64(a[5])*(uint64(b[2])<<0) +
667                 uint64(a[6])*(uint64(b[1])<<0) +
668                 uint64(a[7])*(uint64(b[0])<<0)
669         // tmp[8] has the greatest value but doesn't overflow. See logic in
670         // p256Square.
671         tmp[8] = uint64(a[0])*(uint64(b[8])<<0) +
672                 uint64(a[1])*(uint64(b[7])<<1) +
673                 uint64(a[2])*(uint64(b[6])<<0) +
674                 uint64(a[3])*(uint64(b[5])<<1) +
675                 uint64(a[4])*(uint64(b[4])<<0) +
676                 uint64(a[5])*(uint64(b[3])<<1) +
677                 uint64(a[6])*(uint64(b[2])<<0) +
678                 uint64(a[7])*(uint64(b[1])<<1) +
679                 uint64(a[8])*(uint64(b[0])<<0)
680         tmp[9] = uint64(a[1])*(uint64(b[8])<<0) +
681                 uint64(a[2])*(uint64(b[7])<<0) +
682                 uint64(a[3])*(uint64(b[6])<<0) +
683                 uint64(a[4])*(uint64(b[5])<<0) +
684                 uint64(a[5])*(uint64(b[4])<<0) +
685                 uint64(a[6])*(uint64(b[3])<<0) +
686                 uint64(a[7])*(uint64(b[2])<<0) +
687                 uint64(a[8])*(uint64(b[1])<<0)
688         tmp[10] = uint64(a[2])*(uint64(b[8])<<0) +
689                 uint64(a[3])*(uint64(b[7])<<1) +
690                 uint64(a[4])*(uint64(b[6])<<0) +
691                 uint64(a[5])*(uint64(b[5])<<1) +
692                 uint64(a[6])*(uint64(b[4])<<0) +
693                 uint64(a[7])*(uint64(b[3])<<1) +
694                 uint64(a[8])*(uint64(b[2])<<0)
695         tmp[11] = uint64(a[3])*(uint64(b[8])<<0) +
696                 uint64(a[4])*(uint64(b[7])<<0) +
697                 uint64(a[5])*(uint64(b[6])<<0) +
698                 uint64(a[6])*(uint64(b[5])<<0) +
699                 uint64(a[7])*(uint64(b[4])<<0) +
700                 uint64(a[8])*(uint64(b[3])<<0)
701         tmp[12] = uint64(a[4])*(uint64(b[8])<<0) +
702                 uint64(a[5])*(uint64(b[7])<<1) +
703                 uint64(a[6])*(uint64(b[6])<<0) +
704                 uint64(a[7])*(uint64(b[5])<<1) +
705                 uint64(a[8])*(uint64(b[4])<<0)
706         tmp[13] = uint64(a[5])*(uint64(b[8])<<0) +
707                 uint64(a[6])*(uint64(b[7])<<0) +
708                 uint64(a[7])*(uint64(b[6])<<0) +
709                 uint64(a[8])*(uint64(b[5])<<0)
710         tmp[14] = uint64(a[6])*(uint64(b[8])<<0) +
711                 uint64(a[7])*(uint64(b[7])<<1) +
712                 uint64(a[8])*(uint64(b[6])<<0)
713         tmp[15] = uint64(a[7])*(uint64(b[8])<<0) +
714                 uint64(a[8])*(uint64(b[7])<<0)
715         tmp[16] = uint64(a[8]) * (uint64(b[8]) << 0)
716         sm2P256ReduceDegree(c, &tmp)
717 }
718
719 // b = a * a
720 func sm2P256Square(b, a *sm2P256FieldElement) {
721         var tmp sm2P256LargeFieldElement
722
723         tmp[0] = uint64(a[0]) * uint64(a[0])
724         tmp[1] = uint64(a[0]) * (uint64(a[1]) << 1)
725         tmp[2] = uint64(a[0])*(uint64(a[2])<<1) +
726                 uint64(a[1])*(uint64(a[1])<<1)
727         tmp[3] = uint64(a[0])*(uint64(a[3])<<1) +
728                 uint64(a[1])*(uint64(a[2])<<1)
729         tmp[4] = uint64(a[0])*(uint64(a[4])<<1) +
730                 uint64(a[1])*(uint64(a[3])<<2) +
731                 uint64(a[2])*uint64(a[2])
732         tmp[5] = uint64(a[0])*(uint64(a[5])<<1) +
733                 uint64(a[1])*(uint64(a[4])<<1) +
734                 uint64(a[2])*(uint64(a[3])<<1)
735         tmp[6] = uint64(a[0])*(uint64(a[6])<<1) +
736                 uint64(a[1])*(uint64(a[5])<<2) +
737                 uint64(a[2])*(uint64(a[4])<<1) +
738                 uint64(a[3])*(uint64(a[3])<<1)
739         tmp[7] = uint64(a[0])*(uint64(a[7])<<1) +
740                 uint64(a[1])*(uint64(a[6])<<1) +
741                 uint64(a[2])*(uint64(a[5])<<1) +
742                 uint64(a[3])*(uint64(a[4])<<1)
743         // tmp[8] has the greatest value of 2**61 + 2**60 + 2**61 + 2**60 + 2**60,
744         // which is < 2**64 as required.
745         tmp[8] = uint64(a[0])*(uint64(a[8])<<1) +
746                 uint64(a[1])*(uint64(a[7])<<2) +
747                 uint64(a[2])*(uint64(a[6])<<1) +
748                 uint64(a[3])*(uint64(a[5])<<2) +
749                 uint64(a[4])*uint64(a[4])
750         tmp[9] = uint64(a[1])*(uint64(a[8])<<1) +
751                 uint64(a[2])*(uint64(a[7])<<1) +
752                 uint64(a[3])*(uint64(a[6])<<1) +
753                 uint64(a[4])*(uint64(a[5])<<1)
754         tmp[10] = uint64(a[2])*(uint64(a[8])<<1) +
755                 uint64(a[3])*(uint64(a[7])<<2) +
756                 uint64(a[4])*(uint64(a[6])<<1) +
757                 uint64(a[5])*(uint64(a[5])<<1)
758         tmp[11] = uint64(a[3])*(uint64(a[8])<<1) +
759                 uint64(a[4])*(uint64(a[7])<<1) +
760                 uint64(a[5])*(uint64(a[6])<<1)
761         tmp[12] = uint64(a[4])*(uint64(a[8])<<1) +
762                 uint64(a[5])*(uint64(a[7])<<2) +
763                 uint64(a[6])*uint64(a[6])
764         tmp[13] = uint64(a[5])*(uint64(a[8])<<1) +
765                 uint64(a[6])*(uint64(a[7])<<1)
766         tmp[14] = uint64(a[6])*(uint64(a[8])<<1) +
767                 uint64(a[7])*(uint64(a[7])<<1)
768         tmp[15] = uint64(a[7]) * (uint64(a[8]) << 1)
769         tmp[16] = uint64(a[8]) * uint64(a[8])
770         sm2P256ReduceDegree(b, &tmp)
771 }
772
773 // nonZeroToAllOnes returns:
774 //   0xffffffff for 0 < x <= 2**31
775 //   0 for x == 0 or x > 2**31.
776 func nonZeroToAllOnes(x uint32) uint32 {
777         return ((x - 1) >> 31) - 1
778 }
779
780 var sm2P256Carry = [8 * 9]uint32{
781         0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
782         0x2, 0x0, 0x1FFFFF00, 0x7FF, 0x0, 0x0, 0x0, 0x2000000, 0x0,
783         0x4, 0x0, 0x1FFFFE00, 0xFFF, 0x0, 0x0, 0x0, 0x4000000, 0x0,
784         0x6, 0x0, 0x1FFFFD00, 0x17FF, 0x0, 0x0, 0x0, 0x6000000, 0x0,
785         0x8, 0x0, 0x1FFFFC00, 0x1FFF, 0x0, 0x0, 0x0, 0x8000000, 0x0,
786         0xA, 0x0, 0x1FFFFB00, 0x27FF, 0x0, 0x0, 0x0, 0xA000000, 0x0,
787         0xC, 0x0, 0x1FFFFA00, 0x2FFF, 0x0, 0x0, 0x0, 0xC000000, 0x0,
788         0xE, 0x0, 0x1FFFF900, 0x37FF, 0x0, 0x0, 0x0, 0xE000000, 0x0,
789 }
790
791 // carry < 2 ^ 3
792 func sm2P256ReduceCarry(a *sm2P256FieldElement, carry uint32) {
793         a[0] += sm2P256Carry[carry*9+0]
794         a[2] += sm2P256Carry[carry*9+2]
795         a[3] += sm2P256Carry[carry*9+3]
796         a[7] += sm2P256Carry[carry*9+7]
797 }
798
799 // 这代码真是丑比了,我也是对自己醉了。。。
800 // 你最好别改这个代码,不然你会死的很惨。。
801 func sm2P256ReduceDegree(a *sm2P256FieldElement, b *sm2P256LargeFieldElement) {
802         var tmp [18]uint32
803         var carry, x, xMask uint32
804
805         // tmp
806         // 0  | 1  | 2  | 3  | 4  | 5  | 6  | 7  | 8  |  9 | 10 ...
807         // 29 | 28 | 29 | 28 | 29 | 28 | 29 | 28 | 29 | 28 | 29 ...
808         tmp[0] = uint32(b[0]) & bottom29Bits
809         tmp[1] = uint32(b[0]) >> 29
810         tmp[1] |= (uint32(b[0]>>32) << 3) & bottom28Bits
811         tmp[1] += uint32(b[1]) & bottom28Bits
812         carry = tmp[1] >> 28
813         tmp[1] &= bottom28Bits
814         for i := 2; i < 17; i++ {
815                 tmp[i] = (uint32(b[i-2] >> 32)) >> 25
816                 tmp[i] += (uint32(b[i-1])) >> 28
817                 tmp[i] += (uint32(b[i-1]>>32) << 4) & bottom29Bits
818                 tmp[i] += uint32(b[i]) & bottom29Bits
819                 tmp[i] += carry
820                 carry = tmp[i] >> 29
821                 tmp[i] &= bottom29Bits
822
823                 i++
824                 if i == 17 {
825                         break
826                 }
827                 tmp[i] = uint32(b[i-2]>>32) >> 25
828                 tmp[i] += uint32(b[i-1]) >> 29
829                 tmp[i] += ((uint32(b[i-1] >> 32)) << 3) & bottom28Bits
830                 tmp[i] += uint32(b[i]) & bottom28Bits
831                 tmp[i] += carry
832                 carry = tmp[i] >> 28
833                 tmp[i] &= bottom28Bits
834         }
835         tmp[17] = uint32(b[15]>>32) >> 25
836         tmp[17] += uint32(b[16]) >> 29
837         tmp[17] += uint32(b[16]>>32) << 3
838         tmp[17] += carry
839
840         for i := 0; ; i += 2 {
841
842                 tmp[i+1] += tmp[i] >> 29
843                 x = tmp[i] & bottom29Bits
844                 tmp[i] = 0
845                 if x > 0 {
846                         set4 := uint32(0)
847                         set7 := uint32(0)
848                         xMask = nonZeroToAllOnes(x)
849                         tmp[i+2] += (x << 7) & bottom29Bits
850                         tmp[i+3] += x >> 22
851                         if tmp[i+3] < 0x10000000 {
852                                 set4 = 1
853                                 tmp[i+3] += 0x10000000 & xMask
854                                 tmp[i+3] -= (x << 10) & bottom28Bits
855                         } else {
856                                 tmp[i+3] -= (x << 10) & bottom28Bits
857                         }
858                         if tmp[i+4] < 0x20000000 {
859                                 tmp[i+4] += 0x20000000 & xMask
860                                 tmp[i+4] -= set4 // 借位
861                                 tmp[i+4] -= x >> 18
862                                 if tmp[i+5] < 0x10000000 {
863                                         tmp[i+5] += 0x10000000 & xMask
864                                         tmp[i+5] -= 1 // 借位
865                                         if tmp[i+6] < 0x20000000 {
866                                                 set7 = 1
867                                                 tmp[i+6] += 0x20000000 & xMask
868                                                 tmp[i+6] -= 1 // 借位
869                                         } else {
870                                                 tmp[i+6] -= 1 // 借位
871                                         }
872                                 } else {
873                                         tmp[i+5] -= 1
874                                 }
875                         } else {
876                                 tmp[i+4] -= set4 // 借位
877                                 tmp[i+4] -= x >> 18
878                         }
879                         if tmp[i+7] < 0x10000000 {
880                                 tmp[i+7] += 0x10000000 & xMask
881                                 tmp[i+7] -= set7
882                                 tmp[i+7] -= (x << 24) & bottom28Bits
883                                 tmp[i+8] += (x << 28) & bottom29Bits
884                                 if tmp[i+8] < 0x20000000 {
885                                         tmp[i+8] += 0x20000000 & xMask
886                                         tmp[i+8] -= 1
887                                         tmp[i+8] -= x >> 4
888                                         tmp[i+9] += ((x >> 1) - 1) & xMask
889                                 } else {
890                                         tmp[i+8] -= 1
891                                         tmp[i+8] -= x >> 4
892                                         tmp[i+9] += (x >> 1) & xMask
893                                 }
894                         } else {
895                                 tmp[i+7] -= set7 // 借位
896                                 tmp[i+7] -= (x << 24) & bottom28Bits
897                                 tmp[i+8] += (x << 28) & bottom29Bits
898                                 if tmp[i+8] < 0x20000000 {
899                                         tmp[i+8] += 0x20000000 & xMask
900                                         tmp[i+8] -= x >> 4
901                                         tmp[i+9] += ((x >> 1) - 1) & xMask
902                                 } else {
903                                         tmp[i+8] -= x >> 4
904                                         tmp[i+9] += (x >> 1) & xMask
905                                 }
906                         }
907
908                 }
909
910                 if i+1 == 9 {
911                         break
912                 }
913
914                 tmp[i+2] += tmp[i+1] >> 28
915                 x = tmp[i+1] & bottom28Bits
916                 tmp[i+1] = 0
917                 if x > 0 {
918                         set5 := uint32(0)
919                         set8 := uint32(0)
920                         set9 := uint32(0)
921                         xMask = nonZeroToAllOnes(x)
922                         tmp[i+3] += (x << 7) & bottom28Bits
923                         tmp[i+4] += x >> 21
924                         if tmp[i+4] < 0x20000000 {
925                                 set5 = 1
926                                 tmp[i+4] += 0x20000000 & xMask
927                                 tmp[i+4] -= (x << 11) & bottom29Bits
928                         } else {
929                                 tmp[i+4] -= (x << 11) & bottom29Bits
930                         }
931                         if tmp[i+5] < 0x10000000 {
932                                 tmp[i+5] += 0x10000000 & xMask
933                                 tmp[i+5] -= set5 // 借位
934                                 tmp[i+5] -= x >> 18
935                                 if tmp[i+6] < 0x20000000 {
936                                         tmp[i+6] += 0x20000000 & xMask
937                                         tmp[i+6] -= 1 // 借位
938                                         if tmp[i+7] < 0x10000000 {
939                                                 set8 = 1
940                                                 tmp[i+7] += 0x10000000 & xMask
941                                                 tmp[i+7] -= 1 // 借位
942                                         } else {
943                                                 tmp[i+7] -= 1 // 借位
944                                         }
945                                 } else {
946                                         tmp[i+6] -= 1 // 借位
947                                 }
948                         } else {
949                                 tmp[i+5] -= set5 // 借位
950                                 tmp[i+5] -= x >> 18
951                         }
952                         if tmp[i+8] < 0x20000000 {
953                                 set9 = 1
954                                 tmp[i+8] += 0x20000000 & xMask
955                                 tmp[i+8] -= set8
956                                 tmp[i+8] -= (x << 25) & bottom29Bits
957                         } else {
958                                 tmp[i+8] -= set8
959                                 tmp[i+8] -= (x << 25) & bottom29Bits
960                         }
961                         if tmp[i+9] < 0x10000000 {
962                                 tmp[i+9] += 0x10000000 & xMask
963                                 tmp[i+9] -= set9 // 借位
964                                 tmp[i+9] -= x >> 4
965                                 tmp[i+10] += (x - 1) & xMask
966                         } else {
967                                 tmp[i+9] -= set9 // 借位
968                                 tmp[i+9] -= x >> 4
969                                 tmp[i+10] += x & xMask
970                         }
971                 }
972         }
973
974         carry = uint32(0)
975         for i := 0; i < 8; i++ {
976                 a[i] = tmp[i+9]
977                 a[i] += carry
978                 a[i] += (tmp[i+10] << 28) & bottom29Bits
979                 carry = a[i] >> 29
980                 a[i] &= bottom29Bits
981
982                 i++
983                 a[i] = tmp[i+9] >> 1
984                 a[i] += carry
985                 carry = a[i] >> 28
986                 a[i] &= bottom28Bits
987         }
988         a[8] = tmp[17]
989         a[8] += carry
990         carry = a[8] >> 29
991         a[8] &= bottom29Bits
992         sm2P256ReduceCarry(a, carry)
993 }
994
995 // b = a
996 func sm2P256Dup(b, a *sm2P256FieldElement) {
997         *b = *a
998 }
999
1000 // X = a * R mod P
1001 func sm2P256FromBig(X *sm2P256FieldElement, a *big.Int) {
1002         x := new(big.Int).Lsh(a, 257)
1003         x.Mod(x, sm2P256.P)
1004         for i := 0; i < 9; i++ {
1005                 if bits := x.Bits(); len(bits) > 0 {
1006                         X[i] = uint32(bits[0]) & bottom29Bits
1007                 } else {
1008                         X[i] = 0
1009                 }
1010                 x.Rsh(x, 29)
1011                 i++
1012                 if i == 9 {
1013                         break
1014                 }
1015                 if bits := x.Bits(); len(bits) > 0 {
1016                         X[i] = uint32(bits[0]) & bottom28Bits
1017                 } else {
1018                         X[i] = 0
1019                 }
1020                 x.Rsh(x, 28)
1021         }
1022 }
1023
1024 // X = r * R mod P
1025 // r = X * R' mod P
1026 func sm2P256ToBig(X *sm2P256FieldElement) *big.Int {
1027         r, tm := new(big.Int), new(big.Int)
1028         r.SetInt64(int64(X[8]))
1029         for i := 7; i >= 0; i-- {
1030                 if (i & 1) == 0 {
1031                         r.Lsh(r, 29)
1032                 } else {
1033                         r.Lsh(r, 28)
1034                 }
1035                 tm.SetInt64(int64(X[i]))
1036                 r.Add(r, tm)
1037         }
1038         r.Mul(r, sm2P256.RInverse)
1039         r.Mod(r, sm2P256.P)
1040         return r
1041 }