1 // I retain copyright in this code but I encourage its free use provided
2 // that I don't carry any responsibility for the results. I am especially
3 // happy to see it used in free and open source software. If you do use
4 // it I would appreciate an acknowledgement of its origin in the code or
5 // the product that results and I would also appreciate knowing a little
6 // about the use to which it is being put. I am grateful to Frank Yellin
7 // for some ideas that are used in this implementation.
9 // Dr B. R. Gladman <brg@gladman.uk.net> 6th April 2001.
11 // This is an implementation of the AES encryption algorithm (Rijndael)
12 // designed by Joan Daemen and Vincent Rijmen. This version is designed
13 // to provide both fixed and dynamic block and key lengths and can also
14 // run with either big or little endian internal byte order (see aes.h).
15 // It inputs block and key lengths in bytes with the legal values being
19 * Modified by Jari Ruusu, May 1 2001
20 * - Fixed some compile warnings, code was ok but gcc warned anyway.
21 * - Changed basic types: byte -> unsigned char, word -> u_int32_t
22 * - Major name space cleanup: Names visible to outside now begin
23 * with "aes_" or "AES_". A lot of stuff moved from aes.h to aes.c
24 * - Removed C++ and DLL support as part of name space cleanup.
25 * - Eliminated unnecessary recomputation of tables. (actual bug fix)
26 * - Merged precomputed constant tables to aes.c file.
27 * - Removed data alignment restrictions for portability reasons.
28 * - Made block and key lengths accept bit count (128/192/256)
29 * as well byte count (16/24/32).
30 * - Removed all error checks. This change also eliminated the need
31 * to preinitialize the context struct to zero.
32 * - Removed some totally unused constants.
37 // CONFIGURATION OPTIONS (see also aes.h)
39 // 1. Define UNROLL for full loop unrolling in encryption and decryption.
40 // 2. Define PARTIAL_UNROLL to unroll two loops in encryption and decryption.
41 // 3. Define FIXED_TABLES for compiled rather than dynamic tables.
42 // 4. Define FF_TABLES to use tables for field multiplies and inverses.
43 // Do not enable this without understanding stack space requirements.
44 // 5. Define ARRAYS to use arrays to hold the local state block. If this
45 // is not defined, individually declared 32-bit words are used.
46 // 6. Define FAST_VARIABLE if a high speed variable block implementation
47 // is needed (essentially three separate fixed block size code sequences)
48 // 7. Define either ONE_TABLE or FOUR_TABLES for a fast table driven
49 // version using 1 table (2 kbytes of table space) or 4 tables (8
50 // kbytes of table space) for higher speed.
51 // 8. Define either ONE_LR_TABLE or FOUR_LR_TABLES for a further speed
52 // increase by using tables for the last rounds but with more table
53 // space (2 or 8 kbytes extra).
54 // 9. If neither ONE_TABLE nor FOUR_TABLES is defined, a compact but
55 // slower version is provided.
56 // 10. If fast decryption key scheduling is needed define ONE_IM_TABLE
57 // or FOUR_IM_TABLES for higher speed (2 or 8 kbytes extra).
59 #if defined(CONFIG_ARCH_IXP425) || defined(CONFIG_ARCH_IXP4XX)
60 #define PARTIAL_UNROLL
65 #define PARTIAL_UNROLL
70 #if defined(__SH3__) || defined(__SH4__)
71 /* How to distinguish the 166MHz part from the 240MHz part?
72 * They've different caches and probably different "best" here.
77 #define FOUR_IM_TABLES
80 /* If the asm version isn't used these settings seem best for the SC520 */
84 #define FOUR_IM_TABLES
87 /* Coldfire processors have a few problems. Code alignment seems to make
88 * a difference :( These settings are for configurations that seem stable
89 * with respect to several compilations and which are fastest some of the
90 * time (i.e. fastest sometimes and always good).
92 /* How to pick CPU type? This should work but isn't nice */
93 #include <linux/autoconf.h>
100 #define PARTIAL_UNROLL
102 #define FAST_VARIABLE
105 #define FOUR_IM_TABLES
112 #define FOUR_IM_TABLES
114 /* Defaults... no idea what they are optimal for :-) */
115 #error tune your aes please
118 //#define PARTIAL_UNROLL
123 #define FAST_VARIABLE
128 //#define ONE_LR_TABLE
129 #define FOUR_LR_TABLES
131 //#define ONE_IM_TABLE
132 #define FOUR_IM_TABLES
141 #if defined(UNROLL) && defined (PARTIAL_UNROLL)
142 #error both UNROLL and PARTIAL_UNROLL are defined
145 #if defined(ONE_TABLE) && defined (FOUR_TABLES)
146 #error both ONE_TABLE and FOUR_TABLES are defined
149 #if defined(ONE_LR_TABLE) && defined (FOUR_LR_TABLES)
150 #error both ONE_LR_TABLE and FOUR_LR_TABLES are defined
153 #if defined(ONE_IM_TABLE) && defined (FOUR_IM_TABLES)
154 #error both ONE_IM_TABLE and FOUR_IM_TABLES are defined
157 #if defined(AES_BLOCK_SIZE) && AES_BLOCK_SIZE != 16 && AES_BLOCK_SIZE != 24 && AES_BLOCK_SIZE != 32
158 #error an illegal block size has been specified
161 // upr(x,n): rotates bytes within words by n positions, moving bytes
162 // to higher index positions with wrap around into low positions
163 // ups(x,n): moves bytes by n positions to higher index positions in
164 // words but without wrap around
165 // bval(x,n): extracts a byte from a word
167 #define upr(x,n) (((x) << 8 * (n)) | ((x) >> (32 - 8 * (n))))
168 #define ups(x,n) ((x) << 8 * (n))
169 #define bval(x,n) ((unsigned char)((x) >> 8 * (n)))
170 #define bytes2word(b0, b1, b2, b3) \
171 ((u_int32_t)(b3) << 24 | (u_int32_t)(b2) << 16 | (u_int32_t)(b1) << 8 | (b0))
173 #if defined(i386) || defined(_I386) || defined(__i386__) || defined(__i386)
174 /* little endian processor without data alignment restrictions */
175 #define word_in(x) *(u_int32_t*)(x)
176 #define const_word_in(x) *(const u_int32_t*)(x)
177 #define word_out(x,v) *(u_int32_t*)(x) = (v)
179 /* slower but generic big endian or with data alignment restrictions */
180 #define word_in(x) ((u_int32_t)(((unsigned char *)(x))[0])|((u_int32_t)(((unsigned char *)(x))[1])<<8)|((u_int32_t)(((unsigned char *)(x))[2])<<16)|((u_int32_t)(((unsigned char *)(x))[3])<<24))
181 #define const_word_in(x) ((const u_int32_t)(((unsigned char *)(x))[0])|((const u_int32_t)(((unsigned char *)(x))[1])<<8)|((const u_int32_t)(((unsigned char *)(x))[2])<<16)|((const u_int32_t)(((unsigned char *)(x))[3])<<24))
182 #define word_out(x,v) ((unsigned char *)(x))[0]=(v),((unsigned char *)(x))[1]=((v)>>8),((unsigned char *)(x))[2]=((v)>>16),((unsigned char *)(x))[3]=((v)>>24)
185 // Disable at least some poor combinations of options
187 #if !defined(ONE_TABLE) && !defined(FOUR_TABLES)
191 #undef FOUR_LR_TABLES
193 #undef FOUR_IM_TABLES
194 #elif !defined(FOUR_TABLES)
195 #ifdef FOUR_LR_TABLES
196 #undef FOUR_LR_TABLES
199 #ifdef FOUR_IM_TABLES
200 #undef FOUR_IM_TABLES
203 #elif !defined(AES_BLOCK_SIZE)
205 #define PARTIAL_UNROLL
210 // the finite field modular polynomial and elements
212 #define ff_poly 0x011b
215 // multiply four bytes in GF(2^8) by 'x' {02} in parallel
217 #define m1 0x80808080
218 #define m2 0x7f7f7f7f
219 #define m3 0x0000001b
220 #define FFmulX(x) ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * m3))
222 // The following defines provide alternative definitions of FFmulX that might
223 // give improved performance if a fast 32-bit multiply is not available. Note
224 // that a temporary variable u needs to be defined where FFmulX is used.
226 // #define FFmulX(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ ((u >> 3) | (u >> 6))
227 // #define m4 0x1b1b1b1b
228 // #define FFmulX(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) & m4)
230 // perform column mix operation on four bytes in parallel
232 #define fwd_mcol(x) (f2 = FFmulX(x), f2 ^ upr(x ^ f2,3) ^ upr(x,2) ^ upr(x,1))
234 #if defined(FIXED_TABLES)
238 static const unsigned char s_box[256] =
240 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
241 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
242 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
243 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
244 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
245 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
246 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
247 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
248 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
249 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
250 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
251 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
252 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
253 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
254 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
255 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
256 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
257 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
258 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
259 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
260 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
261 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
262 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
263 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
264 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
265 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
266 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
267 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
268 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
269 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
270 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
271 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
274 // the inverse S-Box table
276 static const unsigned char inv_s_box[256] =
278 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
279 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
280 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
281 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
282 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
283 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
284 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
285 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
286 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
287 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
288 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
289 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
290 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
291 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
292 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
293 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
294 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
295 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
296 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
297 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
298 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
299 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
300 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
301 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
302 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
303 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
304 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
305 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
306 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
307 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
308 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
309 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
312 #define w0(p) 0x000000##p
314 // Number of elements required in this table for different
315 // block and key lengths is:
323 // this table can be a table of bytes if the key schedule
324 // code is adjusted accordingly
326 static const u_int32_t rcon_tab[29] =
328 w0(01), w0(02), w0(04), w0(08),
329 w0(10), w0(20), w0(40), w0(80),
330 w0(1b), w0(36), w0(6c), w0(d8),
331 w0(ab), w0(4d), w0(9a), w0(2f),
332 w0(5e), w0(bc), w0(63), w0(c6),
333 w0(97), w0(35), w0(6a), w0(d4),
334 w0(b3), w0(7d), w0(fa), w0(ef),
340 #define r0(p,q,r,s) 0x##p##q##r##s
341 #define r1(p,q,r,s) 0x##q##r##s##p
342 #define r2(p,q,r,s) 0x##r##s##p##q
343 #define r3(p,q,r,s) 0x##s##p##q##r
344 #define w0(p) 0x000000##p
345 #define w1(p) 0x0000##p##00
346 #define w2(p) 0x00##p##0000
347 #define w3(p) 0x##p##000000
349 #if defined(FIXED_TABLES) && (defined(ONE_TABLE) || defined(FOUR_TABLES))
351 // data for forward tables (other than last round)
354 r(a5,63,63,c6), r(84,7c,7c,f8), r(99,77,77,ee), r(8d,7b,7b,f6),\
355 r(0d,f2,f2,ff), r(bd,6b,6b,d6), r(b1,6f,6f,de), r(54,c5,c5,91),\
356 r(50,30,30,60), r(03,01,01,02), r(a9,67,67,ce), r(7d,2b,2b,56),\
357 r(19,fe,fe,e7), r(62,d7,d7,b5), r(e6,ab,ab,4d), r(9a,76,76,ec),\
358 r(45,ca,ca,8f), r(9d,82,82,1f), r(40,c9,c9,89), r(87,7d,7d,fa),\
359 r(15,fa,fa,ef), r(eb,59,59,b2), r(c9,47,47,8e), r(0b,f0,f0,fb),\
360 r(ec,ad,ad,41), r(67,d4,d4,b3), r(fd,a2,a2,5f), r(ea,af,af,45),\
361 r(bf,9c,9c,23), r(f7,a4,a4,53), r(96,72,72,e4), r(5b,c0,c0,9b),\
362 r(c2,b7,b7,75), r(1c,fd,fd,e1), r(ae,93,93,3d), r(6a,26,26,4c),\
363 r(5a,36,36,6c), r(41,3f,3f,7e), r(02,f7,f7,f5), r(4f,cc,cc,83),\
364 r(5c,34,34,68), r(f4,a5,a5,51), r(34,e5,e5,d1), r(08,f1,f1,f9),\
365 r(93,71,71,e2), r(73,d8,d8,ab), r(53,31,31,62), r(3f,15,15,2a),\
366 r(0c,04,04,08), r(52,c7,c7,95), r(65,23,23,46), r(5e,c3,c3,9d),\
367 r(28,18,18,30), r(a1,96,96,37), r(0f,05,05,0a), r(b5,9a,9a,2f),\
368 r(09,07,07,0e), r(36,12,12,24), r(9b,80,80,1b), r(3d,e2,e2,df),\
369 r(26,eb,eb,cd), r(69,27,27,4e), r(cd,b2,b2,7f), r(9f,75,75,ea),\
370 r(1b,09,09,12), r(9e,83,83,1d), r(74,2c,2c,58), r(2e,1a,1a,34),\
371 r(2d,1b,1b,36), r(b2,6e,6e,dc), r(ee,5a,5a,b4), r(fb,a0,a0,5b),\
372 r(f6,52,52,a4), r(4d,3b,3b,76), r(61,d6,d6,b7), r(ce,b3,b3,7d),\
373 r(7b,29,29,52), r(3e,e3,e3,dd), r(71,2f,2f,5e), r(97,84,84,13),\
374 r(f5,53,53,a6), r(68,d1,d1,b9), r(00,00,00,00), r(2c,ed,ed,c1),\
375 r(60,20,20,40), r(1f,fc,fc,e3), r(c8,b1,b1,79), r(ed,5b,5b,b6),\
376 r(be,6a,6a,d4), r(46,cb,cb,8d), r(d9,be,be,67), r(4b,39,39,72),\
377 r(de,4a,4a,94), r(d4,4c,4c,98), r(e8,58,58,b0), r(4a,cf,cf,85),\
378 r(6b,d0,d0,bb), r(2a,ef,ef,c5), r(e5,aa,aa,4f), r(16,fb,fb,ed),\
379 r(c5,43,43,86), r(d7,4d,4d,9a), r(55,33,33,66), r(94,85,85,11),\
380 r(cf,45,45,8a), r(10,f9,f9,e9), r(06,02,02,04), r(81,7f,7f,fe),\
381 r(f0,50,50,a0), r(44,3c,3c,78), r(ba,9f,9f,25), r(e3,a8,a8,4b),\
382 r(f3,51,51,a2), r(fe,a3,a3,5d), r(c0,40,40,80), r(8a,8f,8f,05),\
383 r(ad,92,92,3f), r(bc,9d,9d,21), r(48,38,38,70), r(04,f5,f5,f1),\
384 r(df,bc,bc,63), r(c1,b6,b6,77), r(75,da,da,af), r(63,21,21,42),\
385 r(30,10,10,20), r(1a,ff,ff,e5), r(0e,f3,f3,fd), r(6d,d2,d2,bf),\
386 r(4c,cd,cd,81), r(14,0c,0c,18), r(35,13,13,26), r(2f,ec,ec,c3),\
387 r(e1,5f,5f,be), r(a2,97,97,35), r(cc,44,44,88), r(39,17,17,2e),\
388 r(57,c4,c4,93), r(f2,a7,a7,55), r(82,7e,7e,fc), r(47,3d,3d,7a),\
389 r(ac,64,64,c8), r(e7,5d,5d,ba), r(2b,19,19,32), r(95,73,73,e6),\
390 r(a0,60,60,c0), r(98,81,81,19), r(d1,4f,4f,9e), r(7f,dc,dc,a3),\
391 r(66,22,22,44), r(7e,2a,2a,54), r(ab,90,90,3b), r(83,88,88,0b),\
392 r(ca,46,46,8c), r(29,ee,ee,c7), r(d3,b8,b8,6b), r(3c,14,14,28),\
393 r(79,de,de,a7), r(e2,5e,5e,bc), r(1d,0b,0b,16), r(76,db,db,ad),\
394 r(3b,e0,e0,db), r(56,32,32,64), r(4e,3a,3a,74), r(1e,0a,0a,14),\
395 r(db,49,49,92), r(0a,06,06,0c), r(6c,24,24,48), r(e4,5c,5c,b8),\
396 r(5d,c2,c2,9f), r(6e,d3,d3,bd), r(ef,ac,ac,43), r(a6,62,62,c4),\
397 r(a8,91,91,39), r(a4,95,95,31), r(37,e4,e4,d3), r(8b,79,79,f2),\
398 r(32,e7,e7,d5), r(43,c8,c8,8b), r(59,37,37,6e), r(b7,6d,6d,da),\
399 r(8c,8d,8d,01), r(64,d5,d5,b1), r(d2,4e,4e,9c), r(e0,a9,a9,49),\
400 r(b4,6c,6c,d8), r(fa,56,56,ac), r(07,f4,f4,f3), r(25,ea,ea,cf),\
401 r(af,65,65,ca), r(8e,7a,7a,f4), r(e9,ae,ae,47), r(18,08,08,10),\
402 r(d5,ba,ba,6f), r(88,78,78,f0), r(6f,25,25,4a), r(72,2e,2e,5c),\
403 r(24,1c,1c,38), r(f1,a6,a6,57), r(c7,b4,b4,73), r(51,c6,c6,97),\
404 r(23,e8,e8,cb), r(7c,dd,dd,a1), r(9c,74,74,e8), r(21,1f,1f,3e),\
405 r(dd,4b,4b,96), r(dc,bd,bd,61), r(86,8b,8b,0d), r(85,8a,8a,0f),\
406 r(90,70,70,e0), r(42,3e,3e,7c), r(c4,b5,b5,71), r(aa,66,66,cc),\
407 r(d8,48,48,90), r(05,03,03,06), r(01,f6,f6,f7), r(12,0e,0e,1c),\
408 r(a3,61,61,c2), r(5f,35,35,6a), r(f9,57,57,ae), r(d0,b9,b9,69),\
409 r(91,86,86,17), r(58,c1,c1,99), r(27,1d,1d,3a), r(b9,9e,9e,27),\
410 r(38,e1,e1,d9), r(13,f8,f8,eb), r(b3,98,98,2b), r(33,11,11,22),\
411 r(bb,69,69,d2), r(70,d9,d9,a9), r(89,8e,8e,07), r(a7,94,94,33),\
412 r(b6,9b,9b,2d), r(22,1e,1e,3c), r(92,87,87,15), r(20,e9,e9,c9),\
413 r(49,ce,ce,87), r(ff,55,55,aa), r(78,28,28,50), r(7a,df,df,a5),\
414 r(8f,8c,8c,03), r(f8,a1,a1,59), r(80,89,89,09), r(17,0d,0d,1a),\
415 r(da,bf,bf,65), r(31,e6,e6,d7), r(c6,42,42,84), r(b8,68,68,d0),\
416 r(c3,41,41,82), r(b0,99,99,29), r(77,2d,2d,5a), r(11,0f,0f,1e),\
417 r(cb,b0,b0,7b), r(fc,54,54,a8), r(d6,bb,bb,6d), r(3a,16,16,2c)
419 // data for inverse tables (other than last round)
422 r(50,a7,f4,51), r(53,65,41,7e), r(c3,a4,17,1a), r(96,5e,27,3a),\
423 r(cb,6b,ab,3b), r(f1,45,9d,1f), r(ab,58,fa,ac), r(93,03,e3,4b),\
424 r(55,fa,30,20), r(f6,6d,76,ad), r(91,76,cc,88), r(25,4c,02,f5),\
425 r(fc,d7,e5,4f), r(d7,cb,2a,c5), r(80,44,35,26), r(8f,a3,62,b5),\
426 r(49,5a,b1,de), r(67,1b,ba,25), r(98,0e,ea,45), r(e1,c0,fe,5d),\
427 r(02,75,2f,c3), r(12,f0,4c,81), r(a3,97,46,8d), r(c6,f9,d3,6b),\
428 r(e7,5f,8f,03), r(95,9c,92,15), r(eb,7a,6d,bf), r(da,59,52,95),\
429 r(2d,83,be,d4), r(d3,21,74,58), r(29,69,e0,49), r(44,c8,c9,8e),\
430 r(6a,89,c2,75), r(78,79,8e,f4), r(6b,3e,58,99), r(dd,71,b9,27),\
431 r(b6,4f,e1,be), r(17,ad,88,f0), r(66,ac,20,c9), r(b4,3a,ce,7d),\
432 r(18,4a,df,63), r(82,31,1a,e5), r(60,33,51,97), r(45,7f,53,62),\
433 r(e0,77,64,b1), r(84,ae,6b,bb), r(1c,a0,81,fe), r(94,2b,08,f9),\
434 r(58,68,48,70), r(19,fd,45,8f), r(87,6c,de,94), r(b7,f8,7b,52),\
435 r(23,d3,73,ab), r(e2,02,4b,72), r(57,8f,1f,e3), r(2a,ab,55,66),\
436 r(07,28,eb,b2), r(03,c2,b5,2f), r(9a,7b,c5,86), r(a5,08,37,d3),\
437 r(f2,87,28,30), r(b2,a5,bf,23), r(ba,6a,03,02), r(5c,82,16,ed),\
438 r(2b,1c,cf,8a), r(92,b4,79,a7), r(f0,f2,07,f3), r(a1,e2,69,4e),\
439 r(cd,f4,da,65), r(d5,be,05,06), r(1f,62,34,d1), r(8a,fe,a6,c4),\
440 r(9d,53,2e,34), r(a0,55,f3,a2), r(32,e1,8a,05), r(75,eb,f6,a4),\
441 r(39,ec,83,0b), r(aa,ef,60,40), r(06,9f,71,5e), r(51,10,6e,bd),\
442 r(f9,8a,21,3e), r(3d,06,dd,96), r(ae,05,3e,dd), r(46,bd,e6,4d),\
443 r(b5,8d,54,91), r(05,5d,c4,71), r(6f,d4,06,04), r(ff,15,50,60),\
444 r(24,fb,98,19), r(97,e9,bd,d6), r(cc,43,40,89), r(77,9e,d9,67),\
445 r(bd,42,e8,b0), r(88,8b,89,07), r(38,5b,19,e7), r(db,ee,c8,79),\
446 r(47,0a,7c,a1), r(e9,0f,42,7c), r(c9,1e,84,f8), r(00,00,00,00),\
447 r(83,86,80,09), r(48,ed,2b,32), r(ac,70,11,1e), r(4e,72,5a,6c),\
448 r(fb,ff,0e,fd), r(56,38,85,0f), r(1e,d5,ae,3d), r(27,39,2d,36),\
449 r(64,d9,0f,0a), r(21,a6,5c,68), r(d1,54,5b,9b), r(3a,2e,36,24),\
450 r(b1,67,0a,0c), r(0f,e7,57,93), r(d2,96,ee,b4), r(9e,91,9b,1b),\
451 r(4f,c5,c0,80), r(a2,20,dc,61), r(69,4b,77,5a), r(16,1a,12,1c),\
452 r(0a,ba,93,e2), r(e5,2a,a0,c0), r(43,e0,22,3c), r(1d,17,1b,12),\
453 r(0b,0d,09,0e), r(ad,c7,8b,f2), r(b9,a8,b6,2d), r(c8,a9,1e,14),\
454 r(85,19,f1,57), r(4c,07,75,af), r(bb,dd,99,ee), r(fd,60,7f,a3),\
455 r(9f,26,01,f7), r(bc,f5,72,5c), r(c5,3b,66,44), r(34,7e,fb,5b),\
456 r(76,29,43,8b), r(dc,c6,23,cb), r(68,fc,ed,b6), r(63,f1,e4,b8),\
457 r(ca,dc,31,d7), r(10,85,63,42), r(40,22,97,13), r(20,11,c6,84),\
458 r(7d,24,4a,85), r(f8,3d,bb,d2), r(11,32,f9,ae), r(6d,a1,29,c7),\
459 r(4b,2f,9e,1d), r(f3,30,b2,dc), r(ec,52,86,0d), r(d0,e3,c1,77),\
460 r(6c,16,b3,2b), r(99,b9,70,a9), r(fa,48,94,11), r(22,64,e9,47),\
461 r(c4,8c,fc,a8), r(1a,3f,f0,a0), r(d8,2c,7d,56), r(ef,90,33,22),\
462 r(c7,4e,49,87), r(c1,d1,38,d9), r(fe,a2,ca,8c), r(36,0b,d4,98),\
463 r(cf,81,f5,a6), r(28,de,7a,a5), r(26,8e,b7,da), r(a4,bf,ad,3f),\
464 r(e4,9d,3a,2c), r(0d,92,78,50), r(9b,cc,5f,6a), r(62,46,7e,54),\
465 r(c2,13,8d,f6), r(e8,b8,d8,90), r(5e,f7,39,2e), r(f5,af,c3,82),\
466 r(be,80,5d,9f), r(7c,93,d0,69), r(a9,2d,d5,6f), r(b3,12,25,cf),\
467 r(3b,99,ac,c8), r(a7,7d,18,10), r(6e,63,9c,e8), r(7b,bb,3b,db),\
468 r(09,78,26,cd), r(f4,18,59,6e), r(01,b7,9a,ec), r(a8,9a,4f,83),\
469 r(65,6e,95,e6), r(7e,e6,ff,aa), r(08,cf,bc,21), r(e6,e8,15,ef),\
470 r(d9,9b,e7,ba), r(ce,36,6f,4a), r(d4,09,9f,ea), r(d6,7c,b0,29),\
471 r(af,b2,a4,31), r(31,23,3f,2a), r(30,94,a5,c6), r(c0,66,a2,35),\
472 r(37,bc,4e,74), r(a6,ca,82,fc), r(b0,d0,90,e0), r(15,d8,a7,33),\
473 r(4a,98,04,f1), r(f7,da,ec,41), r(0e,50,cd,7f), r(2f,f6,91,17),\
474 r(8d,d6,4d,76), r(4d,b0,ef,43), r(54,4d,aa,cc), r(df,04,96,e4),\
475 r(e3,b5,d1,9e), r(1b,88,6a,4c), r(b8,1f,2c,c1), r(7f,51,65,46),\
476 r(04,ea,5e,9d), r(5d,35,8c,01), r(73,74,87,fa), r(2e,41,0b,fb),\
477 r(5a,1d,67,b3), r(52,d2,db,92), r(33,56,10,e9), r(13,47,d6,6d),\
478 r(8c,61,d7,9a), r(7a,0c,a1,37), r(8e,14,f8,59), r(89,3c,13,eb),\
479 r(ee,27,a9,ce), r(35,c9,61,b7), r(ed,e5,1c,e1), r(3c,b1,47,7a),\
480 r(59,df,d2,9c), r(3f,73,f2,55), r(79,ce,14,18), r(bf,37,c7,73),\
481 r(ea,cd,f7,53), r(5b,aa,fd,5f), r(14,6f,3d,df), r(86,db,44,78),\
482 r(81,f3,af,ca), r(3e,c4,68,b9), r(2c,34,24,38), r(5f,40,a3,c2),\
483 r(72,c3,1d,16), r(0c,25,e2,bc), r(8b,49,3c,28), r(41,95,0d,ff),\
484 r(71,01,a8,39), r(de,b3,0c,08), r(9c,e4,b4,d8), r(90,c1,56,64),\
485 r(61,84,cb,7b), r(70,b6,32,d5), r(74,5c,6c,48), r(42,57,b8,d0)
487 // generate the required tables in the desired endian format
492 #if defined(ONE_TABLE)
493 static const u_int32_t ft_tab[256] =
495 #elif defined(FOUR_TABLES)
496 static const u_int32_t ft_tab[4][256] =
512 #if defined(ONE_TABLE)
513 static const u_int32_t it_tab[256] =
515 #elif defined(FOUR_TABLES)
516 static const u_int32_t it_tab[4][256] =
532 #if defined(FIXED_TABLES) && (defined(ONE_LR_TABLE) || defined(FOUR_LR_TABLES))
534 // data for inverse tables (last round)
537 w(52), w(09), w(6a), w(d5), w(30), w(36), w(a5), w(38),\
538 w(bf), w(40), w(a3), w(9e), w(81), w(f3), w(d7), w(fb),\
539 w(7c), w(e3), w(39), w(82), w(9b), w(2f), w(ff), w(87),\
540 w(34), w(8e), w(43), w(44), w(c4), w(de), w(e9), w(cb),\
541 w(54), w(7b), w(94), w(32), w(a6), w(c2), w(23), w(3d),\
542 w(ee), w(4c), w(95), w(0b), w(42), w(fa), w(c3), w(4e),\
543 w(08), w(2e), w(a1), w(66), w(28), w(d9), w(24), w(b2),\
544 w(76), w(5b), w(a2), w(49), w(6d), w(8b), w(d1), w(25),\
545 w(72), w(f8), w(f6), w(64), w(86), w(68), w(98), w(16),\
546 w(d4), w(a4), w(5c), w(cc), w(5d), w(65), w(b6), w(92),\
547 w(6c), w(70), w(48), w(50), w(fd), w(ed), w(b9), w(da),\
548 w(5e), w(15), w(46), w(57), w(a7), w(8d), w(9d), w(84),\
549 w(90), w(d8), w(ab), w(00), w(8c), w(bc), w(d3), w(0a),\
550 w(f7), w(e4), w(58), w(05), w(b8), w(b3), w(45), w(06),\
551 w(d0), w(2c), w(1e), w(8f), w(ca), w(3f), w(0f), w(02),\
552 w(c1), w(af), w(bd), w(03), w(01), w(13), w(8a), w(6b),\
553 w(3a), w(91), w(11), w(41), w(4f), w(67), w(dc), w(ea),\
554 w(97), w(f2), w(cf), w(ce), w(f0), w(b4), w(e6), w(73),\
555 w(96), w(ac), w(74), w(22), w(e7), w(ad), w(35), w(85),\
556 w(e2), w(f9), w(37), w(e8), w(1c), w(75), w(df), w(6e),\
557 w(47), w(f1), w(1a), w(71), w(1d), w(29), w(c5), w(89),\
558 w(6f), w(b7), w(62), w(0e), w(aa), w(18), w(be), w(1b),\
559 w(fc), w(56), w(3e), w(4b), w(c6), w(d2), w(79), w(20),\
560 w(9a), w(db), w(c0), w(fe), w(78), w(cd), w(5a), w(f4),\
561 w(1f), w(dd), w(a8), w(33), w(88), w(07), w(c7), w(31),\
562 w(b1), w(12), w(10), w(59), w(27), w(80), w(ec), w(5f),\
563 w(60), w(51), w(7f), w(a9), w(19), w(b5), w(4a), w(0d),\
564 w(2d), w(e5), w(7a), w(9f), w(93), w(c9), w(9c), w(ef),\
565 w(a0), w(e0), w(3b), w(4d), w(ae), w(2a), w(f5), w(b0),\
566 w(c8), w(eb), w(bb), w(3c), w(83), w(53), w(99), w(61),\
567 w(17), w(2b), w(04), w(7e), w(ba), w(77), w(d6), w(26),\
568 w(e1), w(69), w(14), w(63), w(55), w(21), w(0c), w(7d),
570 // generate the required tables in the desired endian format
573 #define r(p,q,r,s) w0(q)
574 #if defined(ONE_LR_TABLE)
575 static const u_int32_t fl_tab[256] =
577 #elif defined(FOUR_LR_TABLES)
578 static const u_int32_t fl_tab[4][256] =
581 #define r(p,q,r,s) w1(q)
584 #define r(p,q,r,s) w2(q)
587 #define r(p,q,r,s) w3(q)
594 #if defined(ONE_LR_TABLE)
595 static const u_int32_t il_tab[256] =
597 #elif defined(FOUR_LR_TABLES)
598 static const u_int32_t il_tab[4][256] =
614 #if defined(FIXED_TABLES) && (defined(ONE_IM_TABLE) || defined(FOUR_IM_TABLES))
617 r(00,00,00,00), r(0b,0d,09,0e), r(16,1a,12,1c), r(1d,17,1b,12),\
618 r(2c,34,24,38), r(27,39,2d,36), r(3a,2e,36,24), r(31,23,3f,2a),\
619 r(58,68,48,70), r(53,65,41,7e), r(4e,72,5a,6c), r(45,7f,53,62),\
620 r(74,5c,6c,48), r(7f,51,65,46), r(62,46,7e,54), r(69,4b,77,5a),\
621 r(b0,d0,90,e0), r(bb,dd,99,ee), r(a6,ca,82,fc), r(ad,c7,8b,f2),\
622 r(9c,e4,b4,d8), r(97,e9,bd,d6), r(8a,fe,a6,c4), r(81,f3,af,ca),\
623 r(e8,b8,d8,90), r(e3,b5,d1,9e), r(fe,a2,ca,8c), r(f5,af,c3,82),\
624 r(c4,8c,fc,a8), r(cf,81,f5,a6), r(d2,96,ee,b4), r(d9,9b,e7,ba),\
625 r(7b,bb,3b,db), r(70,b6,32,d5), r(6d,a1,29,c7), r(66,ac,20,c9),\
626 r(57,8f,1f,e3), r(5c,82,16,ed), r(41,95,0d,ff), r(4a,98,04,f1),\
627 r(23,d3,73,ab), r(28,de,7a,a5), r(35,c9,61,b7), r(3e,c4,68,b9),\
628 r(0f,e7,57,93), r(04,ea,5e,9d), r(19,fd,45,8f), r(12,f0,4c,81),\
629 r(cb,6b,ab,3b), r(c0,66,a2,35), r(dd,71,b9,27), r(d6,7c,b0,29),\
630 r(e7,5f,8f,03), r(ec,52,86,0d), r(f1,45,9d,1f), r(fa,48,94,11),\
631 r(93,03,e3,4b), r(98,0e,ea,45), r(85,19,f1,57), r(8e,14,f8,59),\
632 r(bf,37,c7,73), r(b4,3a,ce,7d), r(a9,2d,d5,6f), r(a2,20,dc,61),\
633 r(f6,6d,76,ad), r(fd,60,7f,a3), r(e0,77,64,b1), r(eb,7a,6d,bf),\
634 r(da,59,52,95), r(d1,54,5b,9b), r(cc,43,40,89), r(c7,4e,49,87),\
635 r(ae,05,3e,dd), r(a5,08,37,d3), r(b8,1f,2c,c1), r(b3,12,25,cf),\
636 r(82,31,1a,e5), r(89,3c,13,eb), r(94,2b,08,f9), r(9f,26,01,f7),\
637 r(46,bd,e6,4d), r(4d,b0,ef,43), r(50,a7,f4,51), r(5b,aa,fd,5f),\
638 r(6a,89,c2,75), r(61,84,cb,7b), r(7c,93,d0,69), r(77,9e,d9,67),\
639 r(1e,d5,ae,3d), r(15,d8,a7,33), r(08,cf,bc,21), r(03,c2,b5,2f),\
640 r(32,e1,8a,05), r(39,ec,83,0b), r(24,fb,98,19), r(2f,f6,91,17),\
641 r(8d,d6,4d,76), r(86,db,44,78), r(9b,cc,5f,6a), r(90,c1,56,64),\
642 r(a1,e2,69,4e), r(aa,ef,60,40), r(b7,f8,7b,52), r(bc,f5,72,5c),\
643 r(d5,be,05,06), r(de,b3,0c,08), r(c3,a4,17,1a), r(c8,a9,1e,14),\
644 r(f9,8a,21,3e), r(f2,87,28,30), r(ef,90,33,22), r(e4,9d,3a,2c),\
645 r(3d,06,dd,96), r(36,0b,d4,98), r(2b,1c,cf,8a), r(20,11,c6,84),\
646 r(11,32,f9,ae), r(1a,3f,f0,a0), r(07,28,eb,b2), r(0c,25,e2,bc),\
647 r(65,6e,95,e6), r(6e,63,9c,e8), r(73,74,87,fa), r(78,79,8e,f4),\
648 r(49,5a,b1,de), r(42,57,b8,d0), r(5f,40,a3,c2), r(54,4d,aa,cc),\
649 r(f7,da,ec,41), r(fc,d7,e5,4f), r(e1,c0,fe,5d), r(ea,cd,f7,53),\
650 r(db,ee,c8,79), r(d0,e3,c1,77), r(cd,f4,da,65), r(c6,f9,d3,6b),\
651 r(af,b2,a4,31), r(a4,bf,ad,3f), r(b9,a8,b6,2d), r(b2,a5,bf,23),\
652 r(83,86,80,09), r(88,8b,89,07), r(95,9c,92,15), r(9e,91,9b,1b),\
653 r(47,0a,7c,a1), r(4c,07,75,af), r(51,10,6e,bd), r(5a,1d,67,b3),\
654 r(6b,3e,58,99), r(60,33,51,97), r(7d,24,4a,85), r(76,29,43,8b),\
655 r(1f,62,34,d1), r(14,6f,3d,df), r(09,78,26,cd), r(02,75,2f,c3),\
656 r(33,56,10,e9), r(38,5b,19,e7), r(25,4c,02,f5), r(2e,41,0b,fb),\
657 r(8c,61,d7,9a), r(87,6c,de,94), r(9a,7b,c5,86), r(91,76,cc,88),\
658 r(a0,55,f3,a2), r(ab,58,fa,ac), r(b6,4f,e1,be), r(bd,42,e8,b0),\
659 r(d4,09,9f,ea), r(df,04,96,e4), r(c2,13,8d,f6), r(c9,1e,84,f8),\
660 r(f8,3d,bb,d2), r(f3,30,b2,dc), r(ee,27,a9,ce), r(e5,2a,a0,c0),\
661 r(3c,b1,47,7a), r(37,bc,4e,74), r(2a,ab,55,66), r(21,a6,5c,68),\
662 r(10,85,63,42), r(1b,88,6a,4c), r(06,9f,71,5e), r(0d,92,78,50),\
663 r(64,d9,0f,0a), r(6f,d4,06,04), r(72,c3,1d,16), r(79,ce,14,18),\
664 r(48,ed,2b,32), r(43,e0,22,3c), r(5e,f7,39,2e), r(55,fa,30,20),\
665 r(01,b7,9a,ec), r(0a,ba,93,e2), r(17,ad,88,f0), r(1c,a0,81,fe),\
666 r(2d,83,be,d4), r(26,8e,b7,da), r(3b,99,ac,c8), r(30,94,a5,c6),\
667 r(59,df,d2,9c), r(52,d2,db,92), r(4f,c5,c0,80), r(44,c8,c9,8e),\
668 r(75,eb,f6,a4), r(7e,e6,ff,aa), r(63,f1,e4,b8), r(68,fc,ed,b6),\
669 r(b1,67,0a,0c), r(ba,6a,03,02), r(a7,7d,18,10), r(ac,70,11,1e),\
670 r(9d,53,2e,34), r(96,5e,27,3a), r(8b,49,3c,28), r(80,44,35,26),\
671 r(e9,0f,42,7c), r(e2,02,4b,72), r(ff,15,50,60), r(f4,18,59,6e),\
672 r(c5,3b,66,44), r(ce,36,6f,4a), r(d3,21,74,58), r(d8,2c,7d,56),\
673 r(7a,0c,a1,37), r(71,01,a8,39), r(6c,16,b3,2b), r(67,1b,ba,25),\
674 r(56,38,85,0f), r(5d,35,8c,01), r(40,22,97,13), r(4b,2f,9e,1d),\
675 r(22,64,e9,47), r(29,69,e0,49), r(34,7e,fb,5b), r(3f,73,f2,55),\
676 r(0e,50,cd,7f), r(05,5d,c4,71), r(18,4a,df,63), r(13,47,d6,6d),\
677 r(ca,dc,31,d7), r(c1,d1,38,d9), r(dc,c6,23,cb), r(d7,cb,2a,c5),\
678 r(e6,e8,15,ef), r(ed,e5,1c,e1), r(f0,f2,07,f3), r(fb,ff,0e,fd),\
679 r(92,b4,79,a7), r(99,b9,70,a9), r(84,ae,6b,bb), r(8f,a3,62,b5),\
680 r(be,80,5d,9f), r(b5,8d,54,91), r(a8,9a,4f,83), r(a3,97,46,8d)
685 #if defined(ONE_IM_TABLE)
686 static const u_int32_t im_tab[256] =
688 #elif defined(FOUR_IM_TABLES)
689 static const u_int32_t im_tab[4][256] =
707 static int tab_gen = 0;
709 static unsigned char s_box[256]; // the S box
710 static unsigned char inv_s_box[256]; // the inverse S box
711 static u_int32_t rcon_tab[AES_RC_LENGTH]; // table of round constants
713 #if defined(ONE_TABLE)
714 static u_int32_t ft_tab[256];
715 static u_int32_t it_tab[256];
716 #elif defined(FOUR_TABLES)
717 static u_int32_t ft_tab[4][256];
718 static u_int32_t it_tab[4][256];
721 #if defined(ONE_LR_TABLE)
722 static u_int32_t fl_tab[256];
723 static u_int32_t il_tab[256];
724 #elif defined(FOUR_LR_TABLES)
725 static u_int32_t fl_tab[4][256];
726 static u_int32_t il_tab[4][256];
729 #if defined(ONE_IM_TABLE)
730 static u_int32_t im_tab[256];
731 #elif defined(FOUR_IM_TABLES)
732 static u_int32_t im_tab[4][256];
735 // Generate the tables for the dynamic table option
737 #if !defined(FF_TABLES)
739 // It will generally be sensible to use tables to compute finite
740 // field multiplies and inverses but where memory is scarse this
741 // code might sometimes be better.
743 // return 2 ^ (n - 1) where n is the bit number of the highest bit
744 // set in x with x in the range 1 < x < 0x00000200. This form is
745 // used so that locals within FFinv can be bytes rather than words
747 static unsigned char hibit(const u_int32_t x)
748 { unsigned char r = (unsigned char)((x >> 1) | (x >> 2));
755 // return the inverse of the finite field element x
757 static unsigned char FFinv(const unsigned char x)
758 { unsigned char p1 = x, p2 = 0x1b, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0;
768 n2 /= n1; p2 ^= p1 * n2; v2 ^= v1 * n2; n2 = hibit(p2);
775 n1 /= n2; p1 ^= p2 * n1; v1 ^= v2 * n1; n1 = hibit(p1);
780 // define the finite field multiplies required for Rijndael
782 #define FFmul02(x) ((((x) & 0x7f) << 1) ^ ((x) & 0x80 ? 0x1b : 0))
783 #define FFmul03(x) ((x) ^ FFmul02(x))
784 #define FFmul09(x) ((x) ^ FFmul02(FFmul02(FFmul02(x))))
785 #define FFmul0b(x) ((x) ^ FFmul02((x) ^ FFmul02(FFmul02(x))))
786 #define FFmul0d(x) ((x) ^ FFmul02(FFmul02((x) ^ FFmul02(x))))
787 #define FFmul0e(x) FFmul02((x) ^ FFmul02((x) ^ FFmul02(x)))
791 #define FFinv(x) ((x) ? pow[255 - log[x]]: 0)
793 #define FFmul02(x) (x ? pow[log[x] + 0x19] : 0)
794 #define FFmul03(x) (x ? pow[log[x] + 0x01] : 0)
795 #define FFmul09(x) (x ? pow[log[x] + 0xc7] : 0)
796 #define FFmul0b(x) (x ? pow[log[x] + 0x68] : 0)
797 #define FFmul0d(x) (x ? pow[log[x] + 0xee] : 0)
798 #define FFmul0e(x) (x ? pow[log[x] + 0xdf] : 0)
802 // The forward and inverse affine transformations used in the S-box
804 #define fwd_affine(x) \
805 (w = (u_int32_t)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(unsigned char)(w^(w>>8)))
807 #define inv_affine(x) \
808 (w = (u_int32_t)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(unsigned char)(w^(w>>8)))
810 static void gen_tabs(void)
813 #if defined(FF_TABLES)
815 unsigned char pow[512], log[256];
817 // log and power tables for GF(2^8) finite field with
818 // 0x011b as modular polynomial - the simplest primitive
819 // root is 0x03, used here to generate the tables
824 pow[i] = (unsigned char)w;
825 pow[i + 255] = (unsigned char)w;
826 log[w] = (unsigned char)i++;
827 w ^= (w << 1) ^ (w & ff_hi ? ff_poly : 0);
833 for(i = 0, w = 1; i < AES_RC_LENGTH; ++i)
835 rcon_tab[i] = bytes2word(w, 0, 0, 0);
836 w = (w << 1) ^ (w & ff_hi ? ff_poly : 0);
839 for(i = 0; i < 256; ++i)
842 s_box[i] = b = fwd_affine(FFinv((unsigned char)i));
844 w = bytes2word(b, 0, 0, 0);
845 #if defined(ONE_LR_TABLE)
847 #elif defined(FOUR_LR_TABLES)
849 fl_tab[1][i] = upr(w,1);
850 fl_tab[2][i] = upr(w,2);
851 fl_tab[3][i] = upr(w,3);
853 w = bytes2word(FFmul02(b), b, b, FFmul03(b));
854 #if defined(ONE_TABLE)
856 #elif defined(FOUR_TABLES)
858 ft_tab[1][i] = upr(w,1);
859 ft_tab[2][i] = upr(w,2);
860 ft_tab[3][i] = upr(w,3);
862 inv_s_box[i] = b = FFinv(inv_affine((unsigned char)i));
864 w = bytes2word(b, 0, 0, 0);
865 #if defined(ONE_LR_TABLE)
867 #elif defined(FOUR_LR_TABLES)
869 il_tab[1][i] = upr(w,1);
870 il_tab[2][i] = upr(w,2);
871 il_tab[3][i] = upr(w,3);
873 w = bytes2word(FFmul0e(b), FFmul09(b), FFmul0d(b), FFmul0b(b));
874 #if defined(ONE_TABLE)
876 #elif defined(FOUR_TABLES)
878 it_tab[1][i] = upr(w,1);
879 it_tab[2][i] = upr(w,2);
880 it_tab[3][i] = upr(w,3);
882 #if defined(ONE_IM_TABLE)
884 #elif defined(FOUR_IM_TABLES)
886 im_tab[1][b] = upr(w,1);
887 im_tab[2][b] = upr(w,2);
888 im_tab[3][b] = upr(w,3);
896 #define no_table(x,box,vf,rf,c) bytes2word( \
897 box[bval(vf(x,0,c),rf(0,c))], \
898 box[bval(vf(x,1,c),rf(1,c))], \
899 box[bval(vf(x,2,c),rf(2,c))], \
900 box[bval(vf(x,3,c),rf(3,c))])
902 #define one_table(x,op,tab,vf,rf,c) \
903 ( tab[bval(vf(x,0,c),rf(0,c))] \
904 ^ op(tab[bval(vf(x,1,c),rf(1,c))],1) \
905 ^ op(tab[bval(vf(x,2,c),rf(2,c))],2) \
906 ^ op(tab[bval(vf(x,3,c),rf(3,c))],3))
908 #define four_tables(x,tab,vf,rf,c) \
909 ( tab[0][bval(vf(x,0,c),rf(0,c))] \
910 ^ tab[1][bval(vf(x,1,c),rf(1,c))] \
911 ^ tab[2][bval(vf(x,2,c),rf(2,c))] \
912 ^ tab[3][bval(vf(x,3,c),rf(3,c))])
914 #define vf1(x,r,c) (x)
916 #define rf2(r,c) ((r-c)&3)
918 #if defined(FOUR_LR_TABLES)
919 #define ls_box(x,c) four_tables(x,fl_tab,vf1,rf2,c)
920 #elif defined(ONE_LR_TABLE)
921 #define ls_box(x,c) one_table(x,upr,fl_tab,vf1,rf2,c)
923 #define ls_box(x,c) no_table(x,s_box,vf1,rf2,c)
926 #if defined(FOUR_IM_TABLES)
927 #define inv_mcol(x) four_tables(x,im_tab,vf1,rf1,0)
928 #elif defined(ONE_IM_TABLE)
929 #define inv_mcol(x) one_table(x,upr,im_tab,vf1,rf1,0)
931 #define inv_mcol(x) \
932 (f9 = (x),f2 = FFmulX(f9), f4 = FFmulX(f2), f8 = FFmulX(f4), f9 ^= f8, \
933 f2 ^= f4 ^ f8 ^ upr(f2 ^ f9,3) ^ upr(f4 ^ f9,2) ^ upr(f9,1))
936 // Subroutine to set the block size (if variable) in bytes, legal
937 // values being 16, 24 and 32.
939 #if defined(AES_BLOCK_SIZE)
940 #define nc (AES_BLOCK_SIZE / 4)
942 #define nc (cx->aes_Ncol)
944 void aes_set_blk(aes_context *cx, int n_bytes)
946 #if !defined(FIXED_TABLES)
947 if(!tab_gen) { gen_tabs(); tab_gen = 1; }
969 // Initialise the key schedule from the user supplied key. The key
970 // length is now specified in bytes - 16, 24 or 32 as appropriate.
971 // This corresponds to bit lengths of 128, 192 and 256 bits, and
972 // to Nk values of 4, 6 and 8 respectively.
974 #define mx(t,f) (*t++ = inv_mcol(*f),f++)
975 #define cp(t,f) *t++ = *f++
977 #if AES_BLOCK_SIZE == 16
978 #define cpy(d,s) cp(d,s); cp(d,s); cp(d,s); cp(d,s)
979 #define mix(d,s) mx(d,s); mx(d,s); mx(d,s); mx(d,s)
980 #elif AES_BLOCK_SIZE == 24
981 #define cpy(d,s) cp(d,s); cp(d,s); cp(d,s); cp(d,s); \
983 #define mix(d,s) mx(d,s); mx(d,s); mx(d,s); mx(d,s); \
985 #elif AES_BLOCK_SIZE == 32
986 #define cpy(d,s) cp(d,s); cp(d,s); cp(d,s); cp(d,s); \
987 cp(d,s); cp(d,s); cp(d,s); cp(d,s)
988 #define mix(d,s) mx(d,s); mx(d,s); mx(d,s); mx(d,s); \
989 mx(d,s); mx(d,s); mx(d,s); mx(d,s)
994 { case 8: cp(d,s); cp(d,s); \
995 case 6: cp(d,s); cp(d,s); \
996 case 4: cp(d,s); cp(d,s); \
1002 { case 8: mx(d,s); mx(d,s); \
1003 case 6: mx(d,s); mx(d,s); \
1004 case 4: mx(d,s); mx(d,s); \
1010 #ifdef CONFIG_IXP4XX_CRYPTO
1011 void ike_aes_set_key(aes_context *cx, const unsigned char in_key[], int n_bytes, const int f)
1013 void aes_set_key(aes_context *cx, const unsigned char in_key[], int n_bytes, const int f)
1014 #endif /* CONFIG_IXP4XX_CRYPTO */
1015 { u_int32_t *kf, *kt, rci;
1017 #if !defined(FIXED_TABLES)
1018 if(!tab_gen) { gen_tabs(); tab_gen = 1; }
1022 case 32: /* bytes */
1023 case 256: /* bits */
1026 case 24: /* bytes */
1027 case 192: /* bits */
1030 case 16: /* bytes */
1031 case 128: /* bits */
1037 cx->aes_Nrnd = (cx->aes_Nkey > nc ? cx->aes_Nkey : nc) + 6;
1039 cx->aes_e_key[0] = const_word_in(in_key );
1040 cx->aes_e_key[1] = const_word_in(in_key + 4);
1041 cx->aes_e_key[2] = const_word_in(in_key + 8);
1042 cx->aes_e_key[3] = const_word_in(in_key + 12);
1045 kt = kf + nc * (cx->aes_Nrnd + 1) - cx->aes_Nkey;
1048 switch(cx->aes_Nkey)
1051 { kf[4] = kf[0] ^ ls_box(kf[3],3) ^ rcon_tab[rci++];
1052 kf[5] = kf[1] ^ kf[4];
1053 kf[6] = kf[2] ^ kf[5];
1054 kf[7] = kf[3] ^ kf[6];
1060 case 6: cx->aes_e_key[4] = const_word_in(in_key + 16);
1061 cx->aes_e_key[5] = const_word_in(in_key + 20);
1063 { kf[ 6] = kf[0] ^ ls_box(kf[5],3) ^ rcon_tab[rci++];
1064 kf[ 7] = kf[1] ^ kf[ 6];
1065 kf[ 8] = kf[2] ^ kf[ 7];
1066 kf[ 9] = kf[3] ^ kf[ 8];
1067 kf[10] = kf[4] ^ kf[ 9];
1068 kf[11] = kf[5] ^ kf[10];
1074 case 8: cx->aes_e_key[4] = const_word_in(in_key + 16);
1075 cx->aes_e_key[5] = const_word_in(in_key + 20);
1076 cx->aes_e_key[6] = const_word_in(in_key + 24);
1077 cx->aes_e_key[7] = const_word_in(in_key + 28);
1079 { kf[ 8] = kf[0] ^ ls_box(kf[7],3) ^ rcon_tab[rci++];
1080 kf[ 9] = kf[1] ^ kf[ 8];
1081 kf[10] = kf[2] ^ kf[ 9];
1082 kf[11] = kf[3] ^ kf[10];
1083 kf[12] = kf[4] ^ ls_box(kf[11],0);
1084 kf[13] = kf[5] ^ kf[12];
1085 kf[14] = kf[6] ^ kf[13];
1086 kf[15] = kf[7] ^ kf[14];
1096 kt = cx->aes_d_key + nc * cx->aes_Nrnd;
1099 cpy(kt, kf); kt -= 2 * nc;
1101 for(i = 1; i < cx->aes_Nrnd; ++i)
1103 #if defined(ONE_TABLE) || defined(FOUR_TABLES)
1104 #if !defined(ONE_IM_TABLE) && !defined(FOUR_IM_TABLES)
1105 u_int32_t f2, f4, f8, f9;
1118 #ifdef CONFIG_IXP4XX_CRYPTO
1119 void aes_set_key(aes_context *cx, const unsigned char in_key[], int n_bytes, const int f)
1120 { u_int32_t *kf, *kt, rci;
1122 #if !defined(FIXED_TABLES)
1123 if(!tab_gen) { gen_tabs(); tab_gen = 1; }
1127 case 32: /* bytes */
1128 case 256: /* bits */
1131 case 24: /* bytes */
1132 case 192: /* bits */
1135 case 16: /* bytes */
1136 case 128: /* bits */
1141 cx->aes_Nrnd = (cx->aes_Nkey > nc ? cx->aes_Nkey : nc) + 6;
1142 memcpy (cx->aes_e_key, in_key, n_bytes);
1145 #endif /* CONFIG_IXP4XX_CRYPTO */
1146 // y = output word, x = input word, r = row, c = column
1147 // for r = 0, 1, 2 and 3 = column accessed for row r
1155 // I am grateful to Frank Yellin for the following constructions
1156 // which, given the column (c) of the output state variable that
1157 // is being computed, return the input state variables which are
1158 // needed for each row (r) of the state
1160 // For the fixed block size options, compilers reduce these two
1161 // expressions to fixed variable references. For variable block
1162 // size code conditional clauses will sometimes be returned
1164 #define unused 77 // Sunset Strip
1166 #define fwd_var(x,r,c) \
1180 : c==3 ? nc==4 ? s(x,0) : s(x,4) \
1182 : c==5 ? nc==8 ? s(x,6) : s(x,0) \
1186 ( c==0 ? nc==8 ? s(x,3) : s(x,2) \
1187 : c==1 ? nc==8 ? s(x,4) : s(x,3) \
1188 : c==2 ? nc==4 ? s(x,0) : nc==8 ? s(x,5) : s(x,4) \
1189 : c==3 ? nc==4 ? s(x,1) : nc==8 ? s(x,6) : s(x,5) \
1190 : c==4 ? nc==8 ? s(x,7) : s(x,0) \
1191 : c==5 ? nc==8 ? s(x,0) : s(x,1) \
1195 ( c==0 ? nc==8 ? s(x,4) : s(x,3) \
1196 : c==1 ? nc==4 ? s(x,0) : nc==8 ? s(x,5) : s(x,4) \
1197 : c==2 ? nc==4 ? s(x,1) : nc==8 ? s(x,6) : s(x,5) \
1198 : c==3 ? nc==4 ? s(x,2) : nc==8 ? s(x,7) : s(x,0) \
1199 : c==4 ? nc==8 ? s(x,0) : s(x,1) \
1200 : c==5 ? nc==8 ? s(x,1) : s(x,2) \
1204 #define inv_var(x,r,c) \
1215 ( c==0 ? nc==4 ? s(x,3) : nc==8 ? s(x,7) : s(x,5) \
1224 ( c==0 ? nc==4 ? s(x,2) : nc==8 ? s(x,5) : s(x,4) \
1225 : c==1 ? nc==4 ? s(x,3) : nc==8 ? s(x,6) : s(x,5) \
1226 : c==2 ? nc==8 ? s(x,7) : s(x,0) \
1227 : c==3 ? nc==8 ? s(x,0) : s(x,1) \
1228 : c==4 ? nc==8 ? s(x,1) : s(x,2) \
1229 : c==5 ? nc==8 ? s(x,2) : s(x,3) \
1233 ( c==0 ? nc==4 ? s(x,1) : nc==8 ? s(x,4) : s(x,3) \
1234 : c==1 ? nc==4 ? s(x,2) : nc==8 ? s(x,5) : s(x,4) \
1235 : c==2 ? nc==4 ? s(x,3) : nc==8 ? s(x,6) : s(x,5) \
1236 : c==3 ? nc==8 ? s(x,7) : s(x,0) \
1237 : c==4 ? nc==8 ? s(x,0) : s(x,1) \
1238 : c==5 ? nc==8 ? s(x,1) : s(x,2) \
1242 #define si(y,x,k,c) s(y,c) = const_word_in(x + 4 * c) ^ k[c]
1243 #define so(y,x,c) word_out(y + 4 * c, s(x,c))
1245 #if defined(FOUR_TABLES)
1246 #define fwd_rnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,ft_tab,fwd_var,rf1,c)
1247 #define inv_rnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,it_tab,inv_var,rf1,c)
1248 #elif defined(ONE_TABLE)
1249 #define fwd_rnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,upr,ft_tab,fwd_var,rf1,c)
1250 #define inv_rnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,upr,it_tab,inv_var,rf1,c)
1252 #define fwd_rnd(y,x,k,c) s(y,c) = fwd_mcol(no_table(x,s_box,fwd_var,rf1,c)) ^ (k)[c]
1253 #define inv_rnd(y,x,k,c) s(y,c) = inv_mcol(no_table(x,inv_s_box,inv_var,rf1,c) ^ (k)[c])
1256 #if defined(FOUR_LR_TABLES)
1257 #define fwd_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,fl_tab,fwd_var,rf1,c)
1258 #define inv_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ four_tables(x,il_tab,inv_var,rf1,c)
1259 #elif defined(ONE_LR_TABLE)
1260 #define fwd_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,ups,fl_tab,fwd_var,rf1,c)
1261 #define inv_lrnd(y,x,k,c) s(y,c)= (k)[c] ^ one_table(x,ups,il_tab,inv_var,rf1,c)
1263 #define fwd_lrnd(y,x,k,c) s(y,c) = no_table(x,s_box,fwd_var,rf1,c) ^ (k)[c]
1264 #define inv_lrnd(y,x,k,c) s(y,c) = no_table(x,inv_s_box,inv_var,rf1,c) ^ (k)[c]
1267 #if AES_BLOCK_SIZE == 16
1270 #define locals(y,x) x[4],y[4]
1272 #define locals(y,x) x##0,x##1,x##2,x##3,y##0,y##1,y##2,y##3
1273 // the following defines prevent the compiler requiring the declaration
1274 // of generated but unused variables in the fwd_var and inv_var macros
1284 #define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
1285 s(y,2) = s(x,2); s(y,3) = s(x,3);
1286 #define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3)
1287 #define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
1288 #define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3)
1290 #elif AES_BLOCK_SIZE == 24
1293 #define locals(y,x) x[6],y[6]
1295 #define locals(y,x) x##0,x##1,x##2,x##3,x##4,x##5, \
1296 y##0,y##1,y##2,y##3,y##4,y##5
1302 #define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
1303 s(y,2) = s(x,2); s(y,3) = s(x,3); \
1304 s(y,4) = s(x,4); s(y,5) = s(x,5);
1305 #define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); \
1306 si(y,x,k,3); si(y,x,k,4); si(y,x,k,5)
1307 #define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); \
1308 so(y,x,3); so(y,x,4); so(y,x,5)
1309 #define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); \
1310 rm(y,x,k,3); rm(y,x,k,4); rm(y,x,k,5)
1314 #define locals(y,x) x[8],y[8]
1316 #define locals(y,x) x##0,x##1,x##2,x##3,x##4,x##5,x##6,x##7, \
1317 y##0,y##1,y##2,y##3,y##4,y##5,y##6,y##7
1319 #define l_copy(y, x) s(y,0) = s(x,0); s(y,1) = s(x,1); \
1320 s(y,2) = s(x,2); s(y,3) = s(x,3); \
1321 s(y,4) = s(x,4); s(y,5) = s(x,5); \
1322 s(y,6) = s(x,6); s(y,7) = s(x,7);
1324 #if AES_BLOCK_SIZE == 32
1326 #define state_in(y,x,k) si(y,x,k,0); si(y,x,k,1); si(y,x,k,2); si(y,x,k,3); \
1327 si(y,x,k,4); si(y,x,k,5); si(y,x,k,6); si(y,x,k,7)
1328 #define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3); \
1329 so(y,x,4); so(y,x,5); so(y,x,6); so(y,x,7)
1330 #define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3); \
1331 rm(y,x,k,4); rm(y,x,k,5); rm(y,x,k,6); rm(y,x,k,7)
1334 #define state_in(y,x,k) \
1336 { case 8: si(y,x,k,7); si(y,x,k,6); \
1337 case 6: si(y,x,k,5); si(y,x,k,4); \
1338 case 4: si(y,x,k,3); si(y,x,k,2); \
1339 si(y,x,k,1); si(y,x,k,0); \
1342 #define state_out(y,x) \
1344 { case 8: so(y,x,7); so(y,x,6); \
1345 case 6: so(y,x,5); so(y,x,4); \
1346 case 4: so(y,x,3); so(y,x,2); \
1347 so(y,x,1); so(y,x,0); \
1350 #if defined(FAST_VARIABLE)
1352 #define round(rm,y,x,k) \
1354 { case 8: rm(y,x,k,7); rm(y,x,k,6); \
1355 rm(y,x,k,5); rm(y,x,k,4); \
1356 rm(y,x,k,3); rm(y,x,k,2); \
1357 rm(y,x,k,1); rm(y,x,k,0); \
1359 case 6: rm(y,x,k,5); rm(y,x,k,4); \
1360 rm(y,x,k,3); rm(y,x,k,2); \
1361 rm(y,x,k,1); rm(y,x,k,0); \
1363 case 4: rm(y,x,k,3); rm(y,x,k,2); \
1364 rm(y,x,k,1); rm(y,x,k,0); \
1369 #define round(rm,y,x,k) \
1371 { case 8: rm(y,x,k,7); rm(y,x,k,6); \
1372 case 6: rm(y,x,k,5); rm(y,x,k,4); \
1373 case 4: rm(y,x,k,3); rm(y,x,k,2); \
1374 rm(y,x,k,1); rm(y,x,k,0); \
1382 void aes_encrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])
1383 { u_int32_t locals(b0, b1);
1384 const u_int32_t *kp = cx->aes_e_key;
1386 #if !defined(ONE_TABLE) && !defined(FOUR_TABLES)
1390 state_in(b0, in_blk, kp); kp += nc;
1394 switch(cx->aes_Nrnd)
1396 case 14: round(fwd_rnd, b1, b0, kp );
1397 round(fwd_rnd, b0, b1, kp + nc ); kp += 2 * nc;
1398 case 12: round(fwd_rnd, b1, b0, kp );
1399 round(fwd_rnd, b0, b1, kp + nc ); kp += 2 * nc;
1400 case 10: round(fwd_rnd, b1, b0, kp );
1401 round(fwd_rnd, b0, b1, kp + nc);
1402 round(fwd_rnd, b1, b0, kp + 2 * nc);
1403 round(fwd_rnd, b0, b1, kp + 3 * nc);
1404 round(fwd_rnd, b1, b0, kp + 4 * nc);
1405 round(fwd_rnd, b0, b1, kp + 5 * nc);
1406 round(fwd_rnd, b1, b0, kp + 6 * nc);
1407 round(fwd_rnd, b0, b1, kp + 7 * nc);
1408 round(fwd_rnd, b1, b0, kp + 8 * nc);
1409 round(fwd_lrnd, b0, b1, kp + 9 * nc);
1412 #elif defined(PARTIAL_UNROLL)
1415 for(rnd = 0; rnd < (cx->aes_Nrnd >> 1) - 1; ++rnd)
1417 round(fwd_rnd, b1, b0, kp);
1418 round(fwd_rnd, b0, b1, kp + nc); kp += 2 * nc;
1421 round(fwd_rnd, b1, b0, kp);
1422 round(fwd_lrnd, b0, b1, kp + nc);
1427 for(rnd = 0; rnd < cx->aes_Nrnd - 1; ++rnd)
1429 round(fwd_rnd, b1, b0, kp);
1430 l_copy(b0, b1); kp += nc;
1433 round(fwd_lrnd, b0, b1, kp);
1437 state_out(out_blk, b0);
1440 void aes_decrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])
1441 { u_int32_t locals(b0, b1);
1442 const u_int32_t *kp = cx->aes_d_key;
1444 #if !defined(ONE_TABLE) && !defined(FOUR_TABLES)
1445 u_int32_t f2, f4, f8, f9;
1448 state_in(b0, in_blk, kp); kp += nc;
1452 switch(cx->aes_Nrnd)
1454 case 14: round(inv_rnd, b1, b0, kp );
1455 round(inv_rnd, b0, b1, kp + nc ); kp += 2 * nc;
1456 case 12: round(inv_rnd, b1, b0, kp );
1457 round(inv_rnd, b0, b1, kp + nc ); kp += 2 * nc;
1458 case 10: round(inv_rnd, b1, b0, kp );
1459 round(inv_rnd, b0, b1, kp + nc);
1460 round(inv_rnd, b1, b0, kp + 2 * nc);
1461 round(inv_rnd, b0, b1, kp + 3 * nc);
1462 round(inv_rnd, b1, b0, kp + 4 * nc);
1463 round(inv_rnd, b0, b1, kp + 5 * nc);
1464 round(inv_rnd, b1, b0, kp + 6 * nc);
1465 round(inv_rnd, b0, b1, kp + 7 * nc);
1466 round(inv_rnd, b1, b0, kp + 8 * nc);
1467 round(inv_lrnd, b0, b1, kp + 9 * nc);
1470 #elif defined(PARTIAL_UNROLL)
1473 for(rnd = 0; rnd < (cx->aes_Nrnd >> 1) - 1; ++rnd)
1475 round(inv_rnd, b1, b0, kp);
1476 round(inv_rnd, b0, b1, kp + nc); kp += 2 * nc;
1479 round(inv_rnd, b1, b0, kp);
1480 round(inv_lrnd, b0, b1, kp + nc);
1485 for(rnd = 0; rnd < cx->aes_Nrnd - 1; ++rnd)
1487 round(inv_rnd, b1, b0, kp);
1488 l_copy(b0, b1); kp += nc;
1491 round(inv_lrnd, b0, b1, kp);
1495 state_out(out_blk, b0);