2 ---------------------------------------------------------------------------
3 Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
7 The redistribution and use of this software (with or without changes)
8 is allowed without the payment of fees or royalties provided that:
10 1. source code distributions include the above copyright notice, this
11 list of conditions and the following disclaimer;
13 2. binary distributions include the above copyright notice, this list
14 of conditions and the following disclaimer in their documentation;
16 3. the name of the copyright holder is not used to endorse products
17 built using this software without specific written permission.
21 This software is provided 'as is' with no explicit or implied warranties
22 in respect of its properties, including, but not limited to, correctness
23 and/or fitness for purpose.
24 ---------------------------------------------------------------------------
27 This is an AES implementation that uses only 8-bit byte operations on the
28 cipher state (there are options to use 32-bit types if available).
30 The combination of mix columns and byte substitution used here is based on
31 that developed by Karl Malbrain. His contribution is acknowledged.
34 /* define if you have a fast memcpy function on your system */
39 # if defined( _MSC_VER )
41 # pragma intrinsic( memcpy )
48 /* define if you have fast 32-bit types on your system */
50 # define HAVE_UINT_32T
53 /* define if you don't want any tables */
58 /* On Intel Core 2 duo VERSION_1 is faster */
60 /* alternative versions (test for performance on your system) */
67 #if defined( HAVE_UINT_32T )
68 typedef unsigned long uint_32t;
71 /* functions for finite field multiplication in the AES Galois field */
78 #define f2(x) ((x << 1) ^ (((x >> 7) & 1) * WPOLY))
79 #define f4(x) ((x << 2) ^ (((x >> 6) & 1) * WPOLY) ^ (((x >> 6) & 2) * WPOLY))
80 #define f8(x) ((x << 3) ^ (((x >> 5) & 1) * WPOLY) ^ (((x >> 5) & 2) * WPOLY) \
81 ^ (((x >> 5) & 4) * WPOLY))
82 #define d2(x) (((x) >> 1) ^ ((x) & 1 ? DPOLY : 0))
84 #define f3(x) (f2(x) ^ x)
85 #define f9(x) (f8(x) ^ x)
86 #define fb(x) (f8(x) ^ f2(x) ^ x)
87 #define fd(x) (f8(x) ^ f4(x) ^ x)
88 #define fe(x) (f8(x) ^ f4(x) ^ f2(x))
90 #if defined( USE_TABLES )
92 #define sb_data(w) { /* S Box data values */ \
93 w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\
94 w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\
95 w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\
96 w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\
97 w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\
98 w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\
99 w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\
100 w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\
101 w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\
102 w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\
103 w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\
104 w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\
105 w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\
106 w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\
107 w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\
108 w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\
109 w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\
110 w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\
111 w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\
112 w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\
113 w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\
114 w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\
115 w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\
116 w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\
117 w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\
118 w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\
119 w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\
120 w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\
121 w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\
122 w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\
123 w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\
124 w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) }
126 #define isb_data(w) { /* inverse S Box data values */ \
127 w(0x52), w(0x09), w(0x6a), w(0xd5), w(0x30), w(0x36), w(0xa5), w(0x38),\
128 w(0xbf), w(0x40), w(0xa3), w(0x9e), w(0x81), w(0xf3), w(0xd7), w(0xfb),\
129 w(0x7c), w(0xe3), w(0x39), w(0x82), w(0x9b), w(0x2f), w(0xff), w(0x87),\
130 w(0x34), w(0x8e), w(0x43), w(0x44), w(0xc4), w(0xde), w(0xe9), w(0xcb),\
131 w(0x54), w(0x7b), w(0x94), w(0x32), w(0xa6), w(0xc2), w(0x23), w(0x3d),\
132 w(0xee), w(0x4c), w(0x95), w(0x0b), w(0x42), w(0xfa), w(0xc3), w(0x4e),\
133 w(0x08), w(0x2e), w(0xa1), w(0x66), w(0x28), w(0xd9), w(0x24), w(0xb2),\
134 w(0x76), w(0x5b), w(0xa2), w(0x49), w(0x6d), w(0x8b), w(0xd1), w(0x25),\
135 w(0x72), w(0xf8), w(0xf6), w(0x64), w(0x86), w(0x68), w(0x98), w(0x16),\
136 w(0xd4), w(0xa4), w(0x5c), w(0xcc), w(0x5d), w(0x65), w(0xb6), w(0x92),\
137 w(0x6c), w(0x70), w(0x48), w(0x50), w(0xfd), w(0xed), w(0xb9), w(0xda),\
138 w(0x5e), w(0x15), w(0x46), w(0x57), w(0xa7), w(0x8d), w(0x9d), w(0x84),\
139 w(0x90), w(0xd8), w(0xab), w(0x00), w(0x8c), w(0xbc), w(0xd3), w(0x0a),\
140 w(0xf7), w(0xe4), w(0x58), w(0x05), w(0xb8), w(0xb3), w(0x45), w(0x06),\
141 w(0xd0), w(0x2c), w(0x1e), w(0x8f), w(0xca), w(0x3f), w(0x0f), w(0x02),\
142 w(0xc1), w(0xaf), w(0xbd), w(0x03), w(0x01), w(0x13), w(0x8a), w(0x6b),\
143 w(0x3a), w(0x91), w(0x11), w(0x41), w(0x4f), w(0x67), w(0xdc), w(0xea),\
144 w(0x97), w(0xf2), w(0xcf), w(0xce), w(0xf0), w(0xb4), w(0xe6), w(0x73),\
145 w(0x96), w(0xac), w(0x74), w(0x22), w(0xe7), w(0xad), w(0x35), w(0x85),\
146 w(0xe2), w(0xf9), w(0x37), w(0xe8), w(0x1c), w(0x75), w(0xdf), w(0x6e),\
147 w(0x47), w(0xf1), w(0x1a), w(0x71), w(0x1d), w(0x29), w(0xc5), w(0x89),\
148 w(0x6f), w(0xb7), w(0x62), w(0x0e), w(0xaa), w(0x18), w(0xbe), w(0x1b),\
149 w(0xfc), w(0x56), w(0x3e), w(0x4b), w(0xc6), w(0xd2), w(0x79), w(0x20),\
150 w(0x9a), w(0xdb), w(0xc0), w(0xfe), w(0x78), w(0xcd), w(0x5a), w(0xf4),\
151 w(0x1f), w(0xdd), w(0xa8), w(0x33), w(0x88), w(0x07), w(0xc7), w(0x31),\
152 w(0xb1), w(0x12), w(0x10), w(0x59), w(0x27), w(0x80), w(0xec), w(0x5f),\
153 w(0x60), w(0x51), w(0x7f), w(0xa9), w(0x19), w(0xb5), w(0x4a), w(0x0d),\
154 w(0x2d), w(0xe5), w(0x7a), w(0x9f), w(0x93), w(0xc9), w(0x9c), w(0xef),\
155 w(0xa0), w(0xe0), w(0x3b), w(0x4d), w(0xae), w(0x2a), w(0xf5), w(0xb0),\
156 w(0xc8), w(0xeb), w(0xbb), w(0x3c), w(0x83), w(0x53), w(0x99), w(0x61),\
157 w(0x17), w(0x2b), w(0x04), w(0x7e), w(0xba), w(0x77), w(0xd6), w(0x26),\
158 w(0xe1), w(0x69), w(0x14), w(0x63), w(0x55), w(0x21), w(0x0c), w(0x7d) }
160 #define mm_data(w) { /* basic data for forming finite field tables */ \
161 w(0x00), w(0x01), w(0x02), w(0x03), w(0x04), w(0x05), w(0x06), w(0x07),\
162 w(0x08), w(0x09), w(0x0a), w(0x0b), w(0x0c), w(0x0d), w(0x0e), w(0x0f),\
163 w(0x10), w(0x11), w(0x12), w(0x13), w(0x14), w(0x15), w(0x16), w(0x17),\
164 w(0x18), w(0x19), w(0x1a), w(0x1b), w(0x1c), w(0x1d), w(0x1e), w(0x1f),\
165 w(0x20), w(0x21), w(0x22), w(0x23), w(0x24), w(0x25), w(0x26), w(0x27),\
166 w(0x28), w(0x29), w(0x2a), w(0x2b), w(0x2c), w(0x2d), w(0x2e), w(0x2f),\
167 w(0x30), w(0x31), w(0x32), w(0x33), w(0x34), w(0x35), w(0x36), w(0x37),\
168 w(0x38), w(0x39), w(0x3a), w(0x3b), w(0x3c), w(0x3d), w(0x3e), w(0x3f),\
169 w(0x40), w(0x41), w(0x42), w(0x43), w(0x44), w(0x45), w(0x46), w(0x47),\
170 w(0x48), w(0x49), w(0x4a), w(0x4b), w(0x4c), w(0x4d), w(0x4e), w(0x4f),\
171 w(0x50), w(0x51), w(0x52), w(0x53), w(0x54), w(0x55), w(0x56), w(0x57),\
172 w(0x58), w(0x59), w(0x5a), w(0x5b), w(0x5c), w(0x5d), w(0x5e), w(0x5f),\
173 w(0x60), w(0x61), w(0x62), w(0x63), w(0x64), w(0x65), w(0x66), w(0x67),\
174 w(0x68), w(0x69), w(0x6a), w(0x6b), w(0x6c), w(0x6d), w(0x6e), w(0x6f),\
175 w(0x70), w(0x71), w(0x72), w(0x73), w(0x74), w(0x75), w(0x76), w(0x77),\
176 w(0x78), w(0x79), w(0x7a), w(0x7b), w(0x7c), w(0x7d), w(0x7e), w(0x7f),\
177 w(0x80), w(0x81), w(0x82), w(0x83), w(0x84), w(0x85), w(0x86), w(0x87),\
178 w(0x88), w(0x89), w(0x8a), w(0x8b), w(0x8c), w(0x8d), w(0x8e), w(0x8f),\
179 w(0x90), w(0x91), w(0x92), w(0x93), w(0x94), w(0x95), w(0x96), w(0x97),\
180 w(0x98), w(0x99), w(0x9a), w(0x9b), w(0x9c), w(0x9d), w(0x9e), w(0x9f),\
181 w(0xa0), w(0xa1), w(0xa2), w(0xa3), w(0xa4), w(0xa5), w(0xa6), w(0xa7),\
182 w(0xa8), w(0xa9), w(0xaa), w(0xab), w(0xac), w(0xad), w(0xae), w(0xaf),\
183 w(0xb0), w(0xb1), w(0xb2), w(0xb3), w(0xb4), w(0xb5), w(0xb6), w(0xb7),\
184 w(0xb8), w(0xb9), w(0xba), w(0xbb), w(0xbc), w(0xbd), w(0xbe), w(0xbf),\
185 w(0xc0), w(0xc1), w(0xc2), w(0xc3), w(0xc4), w(0xc5), w(0xc6), w(0xc7),\
186 w(0xc8), w(0xc9), w(0xca), w(0xcb), w(0xcc), w(0xcd), w(0xce), w(0xcf),\
187 w(0xd0), w(0xd1), w(0xd2), w(0xd3), w(0xd4), w(0xd5), w(0xd6), w(0xd7),\
188 w(0xd8), w(0xd9), w(0xda), w(0xdb), w(0xdc), w(0xdd), w(0xde), w(0xdf),\
189 w(0xe0), w(0xe1), w(0xe2), w(0xe3), w(0xe4), w(0xe5), w(0xe6), w(0xe7),\
190 w(0xe8), w(0xe9), w(0xea), w(0xeb), w(0xec), w(0xed), w(0xee), w(0xef),\
191 w(0xf0), w(0xf1), w(0xf2), w(0xf3), w(0xf4), w(0xf5), w(0xf6), w(0xf7),\
192 w(0xf8), w(0xf9), w(0xfa), w(0xfb), w(0xfc), w(0xfd), w(0xfe), w(0xff) }
194 static const uint_8t sbox[256] = sb_data(f1);
195 static const uint_8t isbox[256] = isb_data(f1);
197 static const uint_8t gfm2_sbox[256] = sb_data(f2);
198 static const uint_8t gfm3_sbox[256] = sb_data(f3);
200 static const uint_8t gfmul_9[256] = mm_data(f9);
201 static const uint_8t gfmul_b[256] = mm_data(fb);
202 static const uint_8t gfmul_d[256] = mm_data(fd);
203 static const uint_8t gfmul_e[256] = mm_data(fe);
205 #define s_box(x) sbox[(x)]
206 #define is_box(x) isbox[(x)]
207 #define gfm2_sb(x) gfm2_sbox[(x)]
208 #define gfm3_sb(x) gfm3_sbox[(x)]
209 #define gfm_9(x) gfmul_9[(x)]
210 #define gfm_b(x) gfmul_b[(x)]
211 #define gfm_d(x) gfmul_d[(x)]
212 #define gfm_e(x) gfmul_e[(x)]
216 /* this is the high bit of x right shifted by 1 */
217 /* position. Since the starting polynomial has */
218 /* 9 bits (0x11b), this right shift keeps the */
219 /* values of all top bits within a byte */
221 static uint_8t hibit(const uint_8t x)
222 { uint_8t r = (uint_8t)((x >> 1) | (x >> 2));
229 /* return the inverse of the finite field element x */
231 static uint_8t gf_inv(const uint_8t x)
232 { uint_8t p1 = x, p2 = BPOLY, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0;
240 while(n2 >= n1) /* divide polynomial p2 by p1 */
242 n2 /= n1; /* shift smaller polynomial left */
243 p2 ^= (p1 * n2) & 0xff; /* and remove from larger one */
244 v2 ^= (v1 * n2); /* shift accumulated value and */
245 n2 = hibit(p2); /* add into result */
250 if(n2) /* repeat with values swapped */
263 /* The forward and inverse affine transformations used in the S-box */
264 uint_8t fwd_affine(const uint_8t x)
266 #if defined( HAVE_UINT_32T )
268 w ^= (w << 1) ^ (w << 2) ^ (w << 3) ^ (w << 4);
269 return 0x63 ^ ((w ^ (w >> 8)) & 0xff);
271 return 0x63 ^ x ^ (x << 1) ^ (x << 2) ^ (x << 3) ^ (x << 4)
272 ^ (x >> 7) ^ (x >> 6) ^ (x >> 5) ^ (x >> 4);
276 uint_8t inv_affine(const uint_8t x)
278 #if defined( HAVE_UINT_32T )
280 w = (w << 1) ^ (w << 3) ^ (w << 6);
281 return 0x05 ^ ((w ^ (w >> 8)) & 0xff);
283 return 0x05 ^ (x << 1) ^ (x << 3) ^ (x << 6)
284 ^ (x >> 7) ^ (x >> 5) ^ (x >> 2);
288 #define s_box(x) fwd_affine(gf_inv(x))
289 #define is_box(x) gf_inv(inv_affine(x))
290 #define gfm2_sb(x) f2(s_box(x))
291 #define gfm3_sb(x) f3(s_box(x))
292 #define gfm_9(x) f9(x)
293 #define gfm_b(x) fb(x)
294 #define gfm_d(x) fd(x)
295 #define gfm_e(x) fe(x)
299 #if defined( HAVE_MEMCPY )
300 # define block_copy_nn(d, s, l) memcpy(d, s, l)
301 # define block_copy(d, s) memcpy(d, s, N_BLOCK)
303 # define block_copy_nn(d, s, l) copy_block_nn(d, s, l)
304 # define block_copy(d, s) copy_block(d, s)
307 #if !defined( HAVE_MEMCPY )
308 static void copy_block( void *d, const void *s )
310 #if defined( HAVE_UINT_32T )
311 ((uint_32t*)d)[ 0] = ((uint_32t*)s)[ 0];
312 ((uint_32t*)d)[ 1] = ((uint_32t*)s)[ 1];
313 ((uint_32t*)d)[ 2] = ((uint_32t*)s)[ 2];
314 ((uint_32t*)d)[ 3] = ((uint_32t*)s)[ 3];
316 ((uint_8t*)d)[ 0] = ((uint_8t*)s)[ 0];
317 ((uint_8t*)d)[ 1] = ((uint_8t*)s)[ 1];
318 ((uint_8t*)d)[ 2] = ((uint_8t*)s)[ 2];
319 ((uint_8t*)d)[ 3] = ((uint_8t*)s)[ 3];
320 ((uint_8t*)d)[ 4] = ((uint_8t*)s)[ 4];
321 ((uint_8t*)d)[ 5] = ((uint_8t*)s)[ 5];
322 ((uint_8t*)d)[ 6] = ((uint_8t*)s)[ 6];
323 ((uint_8t*)d)[ 7] = ((uint_8t*)s)[ 7];
324 ((uint_8t*)d)[ 8] = ((uint_8t*)s)[ 8];
325 ((uint_8t*)d)[ 9] = ((uint_8t*)s)[ 9];
326 ((uint_8t*)d)[10] = ((uint_8t*)s)[10];
327 ((uint_8t*)d)[11] = ((uint_8t*)s)[11];
328 ((uint_8t*)d)[12] = ((uint_8t*)s)[12];
329 ((uint_8t*)d)[13] = ((uint_8t*)s)[13];
330 ((uint_8t*)d)[14] = ((uint_8t*)s)[14];
331 ((uint_8t*)d)[15] = ((uint_8t*)s)[15];
335 static void copy_block_nn( void * d, const void *s, uint_8t nn )
338 *((uint_8t*)d)++ = *((uint_8t*)s)++;
342 static void xor_block( void *d, const void *s )
344 #if defined( HAVE_UINT_32T )
345 ((uint_32t*)d)[ 0] ^= ((uint_32t*)s)[ 0];
346 ((uint_32t*)d)[ 1] ^= ((uint_32t*)s)[ 1];
347 ((uint_32t*)d)[ 2] ^= ((uint_32t*)s)[ 2];
348 ((uint_32t*)d)[ 3] ^= ((uint_32t*)s)[ 3];
350 ((uint_8t*)d)[ 0] ^= ((uint_8t*)s)[ 0];
351 ((uint_8t*)d)[ 1] ^= ((uint_8t*)s)[ 1];
352 ((uint_8t*)d)[ 2] ^= ((uint_8t*)s)[ 2];
353 ((uint_8t*)d)[ 3] ^= ((uint_8t*)s)[ 3];
354 ((uint_8t*)d)[ 4] ^= ((uint_8t*)s)[ 4];
355 ((uint_8t*)d)[ 5] ^= ((uint_8t*)s)[ 5];
356 ((uint_8t*)d)[ 6] ^= ((uint_8t*)s)[ 6];
357 ((uint_8t*)d)[ 7] ^= ((uint_8t*)s)[ 7];
358 ((uint_8t*)d)[ 8] ^= ((uint_8t*)s)[ 8];
359 ((uint_8t*)d)[ 9] ^= ((uint_8t*)s)[ 9];
360 ((uint_8t*)d)[10] ^= ((uint_8t*)s)[10];
361 ((uint_8t*)d)[11] ^= ((uint_8t*)s)[11];
362 ((uint_8t*)d)[12] ^= ((uint_8t*)s)[12];
363 ((uint_8t*)d)[13] ^= ((uint_8t*)s)[13];
364 ((uint_8t*)d)[14] ^= ((uint_8t*)s)[14];
365 ((uint_8t*)d)[15] ^= ((uint_8t*)s)[15];
369 static void copy_and_key( void *d, const void *s, const void *k )
371 #if defined( HAVE_UINT_32T )
372 ((uint_32t*)d)[ 0] = ((uint_32t*)s)[ 0] ^ ((uint_32t*)k)[ 0];
373 ((uint_32t*)d)[ 1] = ((uint_32t*)s)[ 1] ^ ((uint_32t*)k)[ 1];
374 ((uint_32t*)d)[ 2] = ((uint_32t*)s)[ 2] ^ ((uint_32t*)k)[ 2];
375 ((uint_32t*)d)[ 3] = ((uint_32t*)s)[ 3] ^ ((uint_32t*)k)[ 3];
377 ((uint_8t*)d)[ 0] = ((uint_8t*)s)[ 0] ^ ((uint_8t*)k)[ 0];
378 ((uint_8t*)d)[ 1] = ((uint_8t*)s)[ 1] ^ ((uint_8t*)k)[ 1];
379 ((uint_8t*)d)[ 2] = ((uint_8t*)s)[ 2] ^ ((uint_8t*)k)[ 2];
380 ((uint_8t*)d)[ 3] = ((uint_8t*)s)[ 3] ^ ((uint_8t*)k)[ 3];
381 ((uint_8t*)d)[ 4] = ((uint_8t*)s)[ 4] ^ ((uint_8t*)k)[ 4];
382 ((uint_8t*)d)[ 5] = ((uint_8t*)s)[ 5] ^ ((uint_8t*)k)[ 5];
383 ((uint_8t*)d)[ 6] = ((uint_8t*)s)[ 6] ^ ((uint_8t*)k)[ 6];
384 ((uint_8t*)d)[ 7] = ((uint_8t*)s)[ 7] ^ ((uint_8t*)k)[ 7];
385 ((uint_8t*)d)[ 8] = ((uint_8t*)s)[ 8] ^ ((uint_8t*)k)[ 8];
386 ((uint_8t*)d)[ 9] = ((uint_8t*)s)[ 9] ^ ((uint_8t*)k)[ 9];
387 ((uint_8t*)d)[10] = ((uint_8t*)s)[10] ^ ((uint_8t*)k)[10];
388 ((uint_8t*)d)[11] = ((uint_8t*)s)[11] ^ ((uint_8t*)k)[11];
389 ((uint_8t*)d)[12] = ((uint_8t*)s)[12] ^ ((uint_8t*)k)[12];
390 ((uint_8t*)d)[13] = ((uint_8t*)s)[13] ^ ((uint_8t*)k)[13];
391 ((uint_8t*)d)[14] = ((uint_8t*)s)[14] ^ ((uint_8t*)k)[14];
392 ((uint_8t*)d)[15] = ((uint_8t*)s)[15] ^ ((uint_8t*)k)[15];
399 static void add_round_key( uint_8t d[N_BLOCK], const uint_8t k[N_BLOCK] )
404 static void shift_sub_rows( uint_8t st[N_BLOCK] )
407 st[ 0] = s_box(st[ 0]); st[ 4] = s_box(st[ 4]);
408 st[ 8] = s_box(st[ 8]); st[12] = s_box(st[12]);
410 tt = st[1]; st[ 1] = s_box(st[ 5]); st[ 5] = s_box(st[ 9]);
411 st[ 9] = s_box(st[13]); st[13] = s_box( tt );
413 tt = st[2]; st[ 2] = s_box(st[10]); st[10] = s_box( tt );
414 tt = st[6]; st[ 6] = s_box(st[14]); st[14] = s_box( tt );
416 tt = st[15]; st[15] = s_box(st[11]); st[11] = s_box(st[ 7]);
417 st[ 7] = s_box(st[ 3]); st[ 3] = s_box( tt );
420 static void inv_shift_sub_rows( uint_8t st[N_BLOCK] )
423 st[ 0] = is_box(st[ 0]); st[ 4] = is_box(st[ 4]);
424 st[ 8] = is_box(st[ 8]); st[12] = is_box(st[12]);
426 tt = st[13]; st[13] = is_box(st[9]); st[ 9] = is_box(st[5]);
427 st[ 5] = is_box(st[1]); st[ 1] = is_box( tt );
429 tt = st[2]; st[ 2] = is_box(st[10]); st[10] = is_box( tt );
430 tt = st[6]; st[ 6] = is_box(st[14]); st[14] = is_box( tt );
432 tt = st[3]; st[ 3] = is_box(st[ 7]); st[ 7] = is_box(st[11]);
433 st[11] = is_box(st[15]); st[15] = is_box( tt );
436 #if defined( VERSION_1 )
437 static void mix_sub_columns( uint_8t dt[N_BLOCK] )
438 { uint_8t st[N_BLOCK];
441 static void mix_sub_columns( uint_8t dt[N_BLOCK], uint_8t st[N_BLOCK] )
444 dt[ 0] = gfm2_sb(st[0]) ^ gfm3_sb(st[5]) ^ s_box(st[10]) ^ s_box(st[15]);
445 dt[ 1] = s_box(st[0]) ^ gfm2_sb(st[5]) ^ gfm3_sb(st[10]) ^ s_box(st[15]);
446 dt[ 2] = s_box(st[0]) ^ s_box(st[5]) ^ gfm2_sb(st[10]) ^ gfm3_sb(st[15]);
447 dt[ 3] = gfm3_sb(st[0]) ^ s_box(st[5]) ^ s_box(st[10]) ^ gfm2_sb(st[15]);
449 dt[ 4] = gfm2_sb(st[4]) ^ gfm3_sb(st[9]) ^ s_box(st[14]) ^ s_box(st[3]);
450 dt[ 5] = s_box(st[4]) ^ gfm2_sb(st[9]) ^ gfm3_sb(st[14]) ^ s_box(st[3]);
451 dt[ 6] = s_box(st[4]) ^ s_box(st[9]) ^ gfm2_sb(st[14]) ^ gfm3_sb(st[3]);
452 dt[ 7] = gfm3_sb(st[4]) ^ s_box(st[9]) ^ s_box(st[14]) ^ gfm2_sb(st[3]);
454 dt[ 8] = gfm2_sb(st[8]) ^ gfm3_sb(st[13]) ^ s_box(st[2]) ^ s_box(st[7]);
455 dt[ 9] = s_box(st[8]) ^ gfm2_sb(st[13]) ^ gfm3_sb(st[2]) ^ s_box(st[7]);
456 dt[10] = s_box(st[8]) ^ s_box(st[13]) ^ gfm2_sb(st[2]) ^ gfm3_sb(st[7]);
457 dt[11] = gfm3_sb(st[8]) ^ s_box(st[13]) ^ s_box(st[2]) ^ gfm2_sb(st[7]);
459 dt[12] = gfm2_sb(st[12]) ^ gfm3_sb(st[1]) ^ s_box(st[6]) ^ s_box(st[11]);
460 dt[13] = s_box(st[12]) ^ gfm2_sb(st[1]) ^ gfm3_sb(st[6]) ^ s_box(st[11]);
461 dt[14] = s_box(st[12]) ^ s_box(st[1]) ^ gfm2_sb(st[6]) ^ gfm3_sb(st[11]);
462 dt[15] = gfm3_sb(st[12]) ^ s_box(st[1]) ^ s_box(st[6]) ^ gfm2_sb(st[11]);
465 #if defined( VERSION_1 )
466 static void inv_mix_sub_columns( uint_8t dt[N_BLOCK] )
467 { uint_8t st[N_BLOCK];
470 static void inv_mix_sub_columns( uint_8t dt[N_BLOCK], uint_8t st[N_BLOCK] )
473 dt[ 0] = is_box(gfm_e(st[ 0]) ^ gfm_b(st[ 1]) ^ gfm_d(st[ 2]) ^ gfm_9(st[ 3]));
474 dt[ 5] = is_box(gfm_9(st[ 0]) ^ gfm_e(st[ 1]) ^ gfm_b(st[ 2]) ^ gfm_d(st[ 3]));
475 dt[10] = is_box(gfm_d(st[ 0]) ^ gfm_9(st[ 1]) ^ gfm_e(st[ 2]) ^ gfm_b(st[ 3]));
476 dt[15] = is_box(gfm_b(st[ 0]) ^ gfm_d(st[ 1]) ^ gfm_9(st[ 2]) ^ gfm_e(st[ 3]));
478 dt[ 4] = is_box(gfm_e(st[ 4]) ^ gfm_b(st[ 5]) ^ gfm_d(st[ 6]) ^ gfm_9(st[ 7]));
479 dt[ 9] = is_box(gfm_9(st[ 4]) ^ gfm_e(st[ 5]) ^ gfm_b(st[ 6]) ^ gfm_d(st[ 7]));
480 dt[14] = is_box(gfm_d(st[ 4]) ^ gfm_9(st[ 5]) ^ gfm_e(st[ 6]) ^ gfm_b(st[ 7]));
481 dt[ 3] = is_box(gfm_b(st[ 4]) ^ gfm_d(st[ 5]) ^ gfm_9(st[ 6]) ^ gfm_e(st[ 7]));
483 dt[ 8] = is_box(gfm_e(st[ 8]) ^ gfm_b(st[ 9]) ^ gfm_d(st[10]) ^ gfm_9(st[11]));
484 dt[13] = is_box(gfm_9(st[ 8]) ^ gfm_e(st[ 9]) ^ gfm_b(st[10]) ^ gfm_d(st[11]));
485 dt[ 2] = is_box(gfm_d(st[ 8]) ^ gfm_9(st[ 9]) ^ gfm_e(st[10]) ^ gfm_b(st[11]));
486 dt[ 7] = is_box(gfm_b(st[ 8]) ^ gfm_d(st[ 9]) ^ gfm_9(st[10]) ^ gfm_e(st[11]));
488 dt[12] = is_box(gfm_e(st[12]) ^ gfm_b(st[13]) ^ gfm_d(st[14]) ^ gfm_9(st[15]));
489 dt[ 1] = is_box(gfm_9(st[12]) ^ gfm_e(st[13]) ^ gfm_b(st[14]) ^ gfm_d(st[15]));
490 dt[ 6] = is_box(gfm_d(st[12]) ^ gfm_9(st[13]) ^ gfm_e(st[14]) ^ gfm_b(st[15]));
491 dt[11] = is_box(gfm_b(st[12]) ^ gfm_d(st[13]) ^ gfm_9(st[14]) ^ gfm_e(st[15]));
494 #if defined( AES_ENC_PREKEYED ) || defined( AES_DEC_PREKEYED )
496 /* Set the cipher key for the pre-keyed version */
498 return_type aes_set_key( const unsigned char key[], length_type keylen, aes_context ctx[1] )
513 /* case 256: length in bits (256 = 8*32) */
518 return (return_type)-1;
520 block_copy_nn(ctx->ksch, key, keylen);
521 hi = (keylen + 28) << 2;
522 ctx->rnd = (hi >> 4) - 1;
523 for( cc = keylen, rc = 1; cc < hi; cc += 4 )
524 { uint_8t tt, t0, t1, t2, t3;
526 t0 = ctx->ksch[cc - 4];
527 t1 = ctx->ksch[cc - 3];
528 t2 = ctx->ksch[cc - 2];
529 t3 = ctx->ksch[cc - 1];
530 if( cc % keylen == 0 )
539 else if( keylen > 24 && cc % keylen == 16 )
547 ctx->ksch[cc + 0] = ctx->ksch[tt + 0] ^ t0;
548 ctx->ksch[cc + 1] = ctx->ksch[tt + 1] ^ t1;
549 ctx->ksch[cc + 2] = ctx->ksch[tt + 2] ^ t2;
550 ctx->ksch[cc + 3] = ctx->ksch[tt + 3] ^ t3;
557 #if defined( AES_ENC_PREKEYED )
559 /* Encrypt a single block of 16 bytes */
561 return_type aes_encrypt( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK], const aes_context ctx[1] )
565 uint_8t s1[N_BLOCK], r;
566 copy_and_key( s1, in, ctx->ksch );
568 for( r = 1 ; r < ctx->rnd ; ++r )
569 #if defined( VERSION_1 )
571 mix_sub_columns( s1 );
572 add_round_key( s1, ctx->ksch + r * N_BLOCK);
575 { uint_8t s2[N_BLOCK];
576 mix_sub_columns( s2, s1 );
577 copy_and_key( s1, s2, ctx->ksch + r * N_BLOCK);
580 shift_sub_rows( s1 );
581 copy_and_key( out, s1, ctx->ksch + r * N_BLOCK );
584 return (return_type)-1;
588 /* CBC encrypt a number of blocks (input and return an IV) */
590 return_type aes_cbc_encrypt( const unsigned char *in, unsigned char *out,
591 int n_block, unsigned char iv[N_BLOCK], const aes_context ctx[1] )
597 if(aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS)
599 memcpy(out, iv, N_BLOCK);
608 #if defined( AES_DEC_PREKEYED )
610 /* Decrypt a single block of 16 bytes */
612 return_type aes_decrypt( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK], const aes_context ctx[1] )
616 uint_8t s1[N_BLOCK], r;
617 copy_and_key( s1, in, ctx->ksch + ctx->rnd * N_BLOCK );
618 inv_shift_sub_rows( s1 );
620 for( r = ctx->rnd ; --r ; )
621 #if defined( VERSION_1 )
623 add_round_key( s1, ctx->ksch + r * N_BLOCK );
624 inv_mix_sub_columns( s1 );
627 { uint_8t s2[N_BLOCK];
628 copy_and_key( s2, s1, ctx->ksch + r * N_BLOCK );
629 inv_mix_sub_columns( s1, s2 );
632 copy_and_key( out, s1, ctx->ksch );
635 return (return_type)-1;
639 /* CBC decrypt a number of blocks (input and return an IV) */
641 return_type aes_cbc_decrypt( const unsigned char *in, unsigned char *out,
642 int n_block, unsigned char iv[N_BLOCK], const aes_context ctx[1] )
645 { uint_8t tmp[N_BLOCK];
647 memcpy(tmp, in, N_BLOCK);
648 if(aes_decrypt(in, out, ctx) != EXIT_SUCCESS)
651 memcpy(iv, tmp, N_BLOCK);
660 #if defined( AES_ENC_128_OTFK )
662 /* The 'on the fly' encryption key update for for 128 bit keys */
664 static void update_encrypt_key_128( uint_8t k[N_BLOCK], uint_8t *rc )
667 k[0] ^= s_box(k[13]) ^ *rc;
668 k[1] ^= s_box(k[14]);
669 k[2] ^= s_box(k[15]);
670 k[3] ^= s_box(k[12]);
673 for(cc = 4; cc < 16; cc += 4 )
675 k[cc + 0] ^= k[cc - 4];
676 k[cc + 1] ^= k[cc - 3];
677 k[cc + 2] ^= k[cc - 2];
678 k[cc + 3] ^= k[cc - 1];
682 /* Encrypt a single block of 16 bytes with 'on the fly' 128 bit keying */
684 void aes_encrypt_128( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
685 const unsigned char key[N_BLOCK], unsigned char o_key[N_BLOCK] )
686 { uint_8t s1[N_BLOCK], r, rc = 1;
689 block_copy( o_key, key );
690 copy_and_key( s1, in, o_key );
692 for( r = 1 ; r < 10 ; ++r )
693 #if defined( VERSION_1 )
695 mix_sub_columns( s1 );
696 update_encrypt_key_128( o_key, &rc );
697 add_round_key( s1, o_key );
700 { uint_8t s2[N_BLOCK];
701 mix_sub_columns( s2, s1 );
702 update_encrypt_key_128( o_key, &rc );
703 copy_and_key( s1, s2, o_key );
707 shift_sub_rows( s1 );
708 update_encrypt_key_128( o_key, &rc );
709 copy_and_key( out, s1, o_key );
714 #if defined( AES_DEC_128_OTFK )
716 /* The 'on the fly' decryption key update for for 128 bit keys */
718 static void update_decrypt_key_128( uint_8t k[N_BLOCK], uint_8t *rc )
721 for( cc = 12; cc > 0; cc -= 4 )
723 k[cc + 0] ^= k[cc - 4];
724 k[cc + 1] ^= k[cc - 3];
725 k[cc + 2] ^= k[cc - 2];
726 k[cc + 3] ^= k[cc - 1];
729 k[0] ^= s_box(k[13]) ^ *rc;
730 k[1] ^= s_box(k[14]);
731 k[2] ^= s_box(k[15]);
732 k[3] ^= s_box(k[12]);
735 /* Decrypt a single block of 16 bytes with 'on the fly' 128 bit keying */
737 void aes_decrypt_128( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
738 const unsigned char key[N_BLOCK], unsigned char o_key[N_BLOCK] )
740 uint_8t s1[N_BLOCK], r, rc = 0x6c;
742 block_copy( o_key, key );
744 copy_and_key( s1, in, o_key );
745 inv_shift_sub_rows( s1 );
747 for( r = 10 ; --r ; )
748 #if defined( VERSION_1 )
750 update_decrypt_key_128( o_key, &rc );
751 add_round_key( s1, o_key );
752 inv_mix_sub_columns( s1 );
755 { uint_8t s2[N_BLOCK];
756 update_decrypt_key_128( o_key, &rc );
757 copy_and_key( s2, s1, o_key );
758 inv_mix_sub_columns( s1, s2 );
761 update_decrypt_key_128( o_key, &rc );
762 copy_and_key( out, s1, o_key );
767 #if defined( AES_ENC_256_OTFK )
769 /* The 'on the fly' encryption key update for for 256 bit keys */
771 static void update_encrypt_key_256( uint_8t k[2 * N_BLOCK], uint_8t *rc )
774 k[0] ^= s_box(k[29]) ^ *rc;
775 k[1] ^= s_box(k[30]);
776 k[2] ^= s_box(k[31]);
777 k[3] ^= s_box(k[28]);
780 for(cc = 4; cc < 16; cc += 4)
782 k[cc + 0] ^= k[cc - 4];
783 k[cc + 1] ^= k[cc - 3];
784 k[cc + 2] ^= k[cc - 2];
785 k[cc + 3] ^= k[cc - 1];
788 k[16] ^= s_box(k[12]);
789 k[17] ^= s_box(k[13]);
790 k[18] ^= s_box(k[14]);
791 k[19] ^= s_box(k[15]);
793 for( cc = 20; cc < 32; cc += 4 )
795 k[cc + 0] ^= k[cc - 4];
796 k[cc + 1] ^= k[cc - 3];
797 k[cc + 2] ^= k[cc - 2];
798 k[cc + 3] ^= k[cc - 1];
802 /* Encrypt a single block of 16 bytes with 'on the fly' 256 bit keying */
804 void aes_encrypt_256( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
805 const unsigned char key[2 * N_BLOCK], unsigned char o_key[2 * N_BLOCK] )
807 uint_8t s1[N_BLOCK], r, rc = 1;
810 block_copy( o_key, key );
811 block_copy( o_key + 16, key + 16 );
813 copy_and_key( s1, in, o_key );
815 for( r = 1 ; r < 14 ; ++r )
816 #if defined( VERSION_1 )
820 add_round_key( s1, o_key + 16 );
823 update_encrypt_key_256( o_key, &rc );
824 add_round_key( s1, o_key );
828 { uint_8t s2[N_BLOCK];
829 mix_sub_columns( s2, s1 );
831 copy_and_key( s1, s2, o_key + 16 );
834 update_encrypt_key_256( o_key, &rc );
835 copy_and_key( s1, s2, o_key );
840 shift_sub_rows( s1 );
841 update_encrypt_key_256( o_key, &rc );
842 copy_and_key( out, s1, o_key );
847 #if defined( AES_DEC_256_OTFK )
849 /* The 'on the fly' encryption key update for for 256 bit keys */
851 static void update_decrypt_key_256( uint_8t k[2 * N_BLOCK], uint_8t *rc )
854 for(cc = 28; cc > 16; cc -= 4)
856 k[cc + 0] ^= k[cc - 4];
857 k[cc + 1] ^= k[cc - 3];
858 k[cc + 2] ^= k[cc - 2];
859 k[cc + 3] ^= k[cc - 1];
862 k[16] ^= s_box(k[12]);
863 k[17] ^= s_box(k[13]);
864 k[18] ^= s_box(k[14]);
865 k[19] ^= s_box(k[15]);
867 for(cc = 12; cc > 0; cc -= 4)
869 k[cc + 0] ^= k[cc - 4];
870 k[cc + 1] ^= k[cc - 3];
871 k[cc + 2] ^= k[cc - 2];
872 k[cc + 3] ^= k[cc - 1];
876 k[0] ^= s_box(k[29]) ^ *rc;
877 k[1] ^= s_box(k[30]);
878 k[2] ^= s_box(k[31]);
879 k[3] ^= s_box(k[28]);
882 /* Decrypt a single block of 16 bytes with 'on the fly'
885 void aes_decrypt_256( const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK],
886 const unsigned char key[2 * N_BLOCK], unsigned char o_key[2 * N_BLOCK] )
888 uint_8t s1[N_BLOCK], r, rc = 0x80;
892 block_copy( o_key, key );
893 block_copy( o_key + 16, key + 16 );
896 copy_and_key( s1, in, o_key );
897 inv_shift_sub_rows( s1 );
899 for( r = 14 ; --r ; )
900 #if defined( VERSION_1 )
904 update_decrypt_key_256( o_key, &rc );
905 add_round_key( s1, o_key + 16 );
908 add_round_key( s1, o_key );
909 inv_mix_sub_columns( s1 );
912 { uint_8t s2[N_BLOCK];
915 update_decrypt_key_256( o_key, &rc );
916 copy_and_key( s2, s1, o_key + 16 );
919 copy_and_key( s2, s1, o_key );
920 inv_mix_sub_columns( s1, s2 );
923 copy_and_key( out, s1, o_key );