]> git.sur5r.net Git - freertos/blob - FreeRTOS-Plus/Source/WolfSSL/ctaocrypt/src/aes.c
e25b5d87320f16ba2c93806046c54e5c2dac8dff
[freertos] / FreeRTOS-Plus / Source / WolfSSL / ctaocrypt / src / aes.c
1 /* aes.c
2  *
3  * Copyright (C) 2006-2014 wolfSSL Inc.
4  *
5  * This file is part of CyaSSL.
6  *
7  * CyaSSL is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * CyaSSL is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
20  */
21
22 #ifdef HAVE_CONFIG_H
23     #include <config.h>
24 #endif
25
26 #include <cyassl/ctaocrypt/settings.h>
27
28 #ifndef NO_AES
29
30 #ifdef HAVE_FIPS
31     /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */
32     #define FIPS_NO_WRAPPERS
33 #endif
34
35 #include <cyassl/ctaocrypt/aes.h>
36 #include <cyassl/ctaocrypt/error-crypt.h>
37 #include <cyassl/ctaocrypt/logging.h>
38 #ifdef NO_INLINE
39     #include <cyassl/ctaocrypt/misc.h>
40 #else
41     #include <ctaocrypt/src/misc.c>
42 #endif
43 #ifdef DEBUG_AESNI
44     #include <stdio.h>
45 #endif
46
47
48 #ifdef _MSC_VER
49     /* 4127 warning constant while(1)  */
50     #pragma warning(disable: 4127)
51 #endif
52
53
54
55 #ifdef HAVE_CAVIUM
56     static int  AesCaviumSetKey(Aes* aes, const byte* key, word32 length,
57                                 const byte* iv);
58     static int  AesCaviumCbcEncrypt(Aes* aes, byte* out, const byte* in,
59                                     word32 length);
60     static int  AesCaviumCbcDecrypt(Aes* aes, byte* out, const byte* in,
61                                     word32 length);
62 #endif
63
64 #if defined(CYASSL_PIC32MZ_CRYPT)
65
66 #include "cyassl/ctaocrypt/port/pic32/pic32mz-crypt.h"
67 #define DEBUG_CYASSL
68
69     /* core hardware crypt engine driver */
70     static void AesCrypt(Aes *aes, byte* out, const byte* in, word32 sz,
71                                             int dir, int algo, int cryptoalgo)
72     {
73         securityAssociation *sa_p ;
74         bufferDescriptor *bd_p ;
75
76         volatile securityAssociation sa __attribute__((aligned (8)));
77         volatile bufferDescriptor bd __attribute__((aligned (8)));
78         volatile int k ;
79
80         /* get uncached address */
81         sa_p = KVA0_TO_KVA1(&sa) ;
82         bd_p = KVA0_TO_KVA1(&bd) ;
83
84         /* Sync cache and physical memory */
85         if(PIC32MZ_IF_RAM(in)) {
86             XMEMCPY((void *)KVA0_TO_KVA1(in), (void *)in, sz);
87         }
88         XMEMSET((void *)KVA0_TO_KVA1(out), 0, sz);
89         /* Set up the Security Association */
90         XMEMSET((byte *)KVA0_TO_KVA1(&sa), 0, sizeof(sa));
91         sa_p->SA_CTRL.ALGO = algo ; /* AES */
92         sa_p->SA_CTRL.LNC = 1;
93         sa_p->SA_CTRL.LOADIV = 1;
94         sa_p->SA_CTRL.FB = 1;
95         sa_p->SA_CTRL.ENCTYPE = dir ; /* Encryption/Decryption */
96         sa_p->SA_CTRL.CRYPTOALGO = cryptoalgo;
97
98         if(cryptoalgo == PIC32_CRYPTOALGO_AES_GCM){
99             switch(aes->keylen) {
100             case 32:
101                 sa_p->SA_CTRL.KEYSIZE = PIC32_AES_KEYSIZE_256 ;
102                 break ;
103             case 24:
104                 sa_p->SA_CTRL.KEYSIZE = PIC32_AES_KEYSIZE_192 ;
105                 break ;
106             case 16:
107                 sa_p->SA_CTRL.KEYSIZE = PIC32_AES_KEYSIZE_128 ;
108                 break ;
109             }
110         } else
111             sa_p->SA_CTRL.KEYSIZE = PIC32_AES_KEYSIZE_128 ;
112
113         ByteReverseWords(
114         (word32 *)KVA0_TO_KVA1(sa.SA_ENCKEY + 8 - aes->keylen/sizeof(word32)),
115                          (word32 *)aes->key_ce, aes->keylen);
116         ByteReverseWords(
117         (word32*)KVA0_TO_KVA1(sa.SA_ENCIV), (word32 *)aes->iv_ce, 16);
118
119         XMEMSET((byte *)KVA0_TO_KVA1(&bd), 0, sizeof(bd));
120         /* Set up the Buffer Descriptor */
121         bd_p->BD_CTRL.BUFLEN = sz;
122         if(cryptoalgo == PIC32_CRYPTOALGO_AES_GCM) {
123             if(sz % 0x10)
124                 bd_p->BD_CTRL.BUFLEN = (sz/0x10 + 1) * 0x10 ;
125         }
126         bd_p->BD_CTRL.LIFM = 1;
127         bd_p->BD_CTRL.SA_FETCH_EN = 1;
128         bd_p->BD_CTRL.LAST_BD = 1;
129         bd_p->BD_CTRL.DESC_EN = 1;
130
131         bd_p->SA_ADDR = (unsigned int)KVA_TO_PA(&sa) ; 
132         bd_p->SRCADDR = (unsigned int)KVA_TO_PA(in) ; 
133         bd_p->DSTADDR = (unsigned int)KVA_TO_PA(out); 
134         bd_p->MSGLEN = sz ;
135
136         CECON = 1 << 6;
137         while (CECON);
138
139         /* Run the engine */
140         CEBDPADDR = (unsigned int)KVA_TO_PA(&bd) ;
141         CEINTEN = 0x07;
142         CECON = 0x27;
143
144         WAIT_ENGINE ;
145
146         if((cryptoalgo == PIC32_CRYPTOALGO_CBC) ||
147            (cryptoalgo == PIC32_CRYPTOALGO_TCBC)||
148            (cryptoalgo == PIC32_CRYPTOALGO_RCBC)) {
149             /* set iv for the next call */
150             if(dir == PIC32_ENCRYPTION) {
151                 XMEMCPY((void *)aes->iv_ce,
152                         (void*)KVA0_TO_KVA1(out + sz - AES_BLOCK_SIZE),
153                         AES_BLOCK_SIZE) ;
154             } else {
155                 ByteReverseWords((word32*)aes->iv_ce,
156                         (word32 *)KVA0_TO_KVA1(in + sz - AES_BLOCK_SIZE),
157                         AES_BLOCK_SIZE);
158             }
159         }
160         XMEMCPY((byte *)out, (byte *)KVA0_TO_KVA1(out), sz) ;
161         ByteReverseWords((word32*)out, (word32 *)out, sz);
162     }
163
164     int AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
165     {
166         AesCrypt(aes, out, in, sz, PIC32_ENCRYPTION, PIC32_ALGO_AES,
167                                                       PIC32_CRYPTOALGO_RCBC );
168         return 0 ;
169     }
170
171     int AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
172     {
173         AesCrypt(aes, out, in, sz, PIC32_DECRYPTION, PIC32_ALGO_AES,
174                                                       PIC32_CRYPTOALGO_RCBC);
175         return 0 ;
176     }
177
178     #if defined(CYASSL_AES_COUNTER)
179     void AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
180     {
181         int i ;
182         char out_block[AES_BLOCK_SIZE] ;
183         int odd ;
184         int even ;
185         char *tmp ; /* (char *)aes->tmp, for short */
186
187         tmp = (char *)aes->tmp ;
188         if(aes->left) {
189             if((aes->left + sz) >= AES_BLOCK_SIZE){
190                 odd = AES_BLOCK_SIZE - aes->left ;
191             } else {
192                 odd = sz ;
193             }
194             XMEMCPY(tmp+aes->left, in, odd) ;
195             if((odd+aes->left) == AES_BLOCK_SIZE){
196                 AesCrypt(aes, out_block, tmp, AES_BLOCK_SIZE,
197                     PIC32_ENCRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_RCTR);
198                 XMEMCPY(out, out_block+aes->left, odd) ;
199                 aes->left = 0 ;
200                 XMEMSET(tmp, 0x0, AES_BLOCK_SIZE) ;
201                 /* Increment IV */
202                 for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) {
203                     if (++((byte *)aes->iv_ce)[i])
204                         break ;
205                 }
206             }
207             in += odd ;
208             out+= odd ;
209             sz -= odd ;
210         }
211         odd = sz % AES_BLOCK_SIZE ;  /* if there is tail flagment */
212         if(sz / AES_BLOCK_SIZE) {
213             even = (sz/AES_BLOCK_SIZE)*AES_BLOCK_SIZE ;
214             AesCrypt(aes, out, in, even, PIC32_ENCRYPTION, PIC32_ALGO_AES,
215                                                     PIC32_CRYPTOALGO_RCTR);
216             out += even ;
217             in  += even ;
218             do {  /* Increment IV */
219                 for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) {
220                     if (++((byte *)aes->iv_ce)[i])
221                         break ;
222                 }
223                 even -= AES_BLOCK_SIZE ;
224             } while((int)even > 0) ;
225         }
226         if(odd) {
227             XMEMSET(tmp+aes->left, 0x0, AES_BLOCK_SIZE - aes->left) ;
228             XMEMCPY(tmp+aes->left, in, odd) ;
229             AesCrypt(aes, out_block, tmp, AES_BLOCK_SIZE,
230                     PIC32_ENCRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_RCTR);
231             XMEMCPY(out, out_block+aes->left,odd) ;
232             aes->left += odd ;
233         }
234     }
235     #endif /* CYASSL_AES_COUNTER */
236
237     #ifdef HAVE_AESGCM
238     #define HAVE_AES_ENGINE
239     /* Hardware AESGCM borows most of the software AESGCM, GMAC */
240     #endif
241
242 #endif /* CYASSL_PIC32MZ_CRYPT */
243
244 #ifdef STM32F2_CRYPTO
245     /*
246      * STM32F2 hardware AES support through the STM32F2 standard peripheral
247      * library. Documentation located in STM32F2xx Standard Peripheral Library
248      * document (See note in README).
249      */
250     #include "stm32f2xx.h"
251     #include "stm32f2xx_cryp.h"
252
253     int AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv,
254                   int dir)
255     {
256         word32 *rk = aes->key;
257
258         if (!((keylen == 16) || (keylen == 24) || (keylen == 32)))
259             return BAD_FUNC_ARG;
260
261         aes->rounds = keylen/4 + 6;
262         XMEMCPY(rk, userKey, keylen);
263         ByteReverseWords(rk, rk, keylen);
264
265         return AesSetIV(aes, iv);
266     }
267
268     int AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
269     {
270         word32 *enc_key, *iv;
271         CRYP_InitTypeDef AES_CRYP_InitStructure;
272         CRYP_KeyInitTypeDef AES_CRYP_KeyInitStructure;
273         CRYP_IVInitTypeDef AES_CRYP_IVInitStructure;
274
275         enc_key = aes->key;
276         iv = aes->reg;
277
278         /* crypto structure initialization */
279         CRYP_KeyStructInit(&AES_CRYP_KeyInitStructure);
280         CRYP_StructInit(&AES_CRYP_InitStructure);
281         CRYP_IVStructInit(&AES_CRYP_IVInitStructure);
282
283         /* reset registers to their default values */
284         CRYP_DeInit();
285
286         /* load key into correct registers */
287         switch(aes->rounds)
288         {
289             case 10: /* 128-bit key */
290                 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_128b;
291                 AES_CRYP_KeyInitStructure.CRYP_Key2Left  = enc_key[0];
292                 AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[1];
293                 AES_CRYP_KeyInitStructure.CRYP_Key3Left  = enc_key[2];
294                 AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[3];
295                 break;
296
297             case 12: /* 192-bit key */
298                 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_192b;
299                 AES_CRYP_KeyInitStructure.CRYP_Key1Left  = enc_key[0];
300                 AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[1];
301                 AES_CRYP_KeyInitStructure.CRYP_Key2Left  = enc_key[2];
302                 AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[3];
303                 AES_CRYP_KeyInitStructure.CRYP_Key3Left  = enc_key[4];
304                 AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[5];
305                 break;
306
307             case 14: /* 256-bit key */
308                 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_256b;
309                 AES_CRYP_KeyInitStructure.CRYP_Key0Left  = enc_key[0];
310                 AES_CRYP_KeyInitStructure.CRYP_Key0Right = enc_key[1];
311                 AES_CRYP_KeyInitStructure.CRYP_Key1Left  = enc_key[2];
312                 AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[3];
313                 AES_CRYP_KeyInitStructure.CRYP_Key2Left  = enc_key[4];
314                 AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[5];
315                 AES_CRYP_KeyInitStructure.CRYP_Key3Left  = enc_key[6];
316                 AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[7];
317                 break;
318
319             default:
320                 break;
321         }
322         CRYP_KeyInit(&AES_CRYP_KeyInitStructure);
323
324         /* set iv */
325         ByteReverseWords(iv, iv, AES_BLOCK_SIZE);
326         AES_CRYP_IVInitStructure.CRYP_IV0Left  = iv[0];
327         AES_CRYP_IVInitStructure.CRYP_IV0Right = iv[1];
328         AES_CRYP_IVInitStructure.CRYP_IV1Left  = iv[2];
329         AES_CRYP_IVInitStructure.CRYP_IV1Right = iv[3];
330         CRYP_IVInit(&AES_CRYP_IVInitStructure);
331
332         /* set direction, mode, and datatype */
333         AES_CRYP_InitStructure.CRYP_AlgoDir  = CRYP_AlgoDir_Encrypt;
334         AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_CBC;
335         AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b;
336         CRYP_Init(&AES_CRYP_InitStructure);
337
338         /* enable crypto processor */
339         CRYP_Cmd(ENABLE);
340
341         while (sz > 0)
342         {
343             /* flush IN/OUT FIFOs */
344             CRYP_FIFOFlush();
345
346             CRYP_DataIn(*(uint32_t*)&in[0]);
347             CRYP_DataIn(*(uint32_t*)&in[4]);
348             CRYP_DataIn(*(uint32_t*)&in[8]);
349             CRYP_DataIn(*(uint32_t*)&in[12]);
350
351             /* wait until the complete message has been processed */
352             while(CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {}
353
354             *(uint32_t*)&out[0]  = CRYP_DataOut();
355             *(uint32_t*)&out[4]  = CRYP_DataOut();
356             *(uint32_t*)&out[8]  = CRYP_DataOut();
357             *(uint32_t*)&out[12] = CRYP_DataOut();
358
359             /* store iv for next call */
360             XMEMCPY(aes->reg, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
361
362             sz  -= 16;
363             in  += 16;
364             out += 16;
365         }
366
367         /* disable crypto processor */
368         CRYP_Cmd(DISABLE);
369
370         return 0;
371     }
372
373     int AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
374     {
375         word32 *dec_key, *iv;
376         CRYP_InitTypeDef AES_CRYP_InitStructure;
377         CRYP_KeyInitTypeDef AES_CRYP_KeyInitStructure;
378         CRYP_IVInitTypeDef AES_CRYP_IVInitStructure;
379
380         dec_key = aes->key;
381         iv = aes->reg;
382
383         /* crypto structure initialization */
384         CRYP_KeyStructInit(&AES_CRYP_KeyInitStructure);
385         CRYP_StructInit(&AES_CRYP_InitStructure);
386         CRYP_IVStructInit(&AES_CRYP_IVInitStructure);
387
388         /* if input and output same will overwrite input iv */
389         XMEMCPY(aes->tmp, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
390
391         /* reset registers to their default values */
392         CRYP_DeInit();
393
394         /* load key into correct registers */
395         switch(aes->rounds)
396         {
397             case 10: /* 128-bit key */
398                 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_128b;
399                 AES_CRYP_KeyInitStructure.CRYP_Key2Left  = dec_key[0];
400                 AES_CRYP_KeyInitStructure.CRYP_Key2Right = dec_key[1];
401                 AES_CRYP_KeyInitStructure.CRYP_Key3Left  = dec_key[2];
402                 AES_CRYP_KeyInitStructure.CRYP_Key3Right = dec_key[3];
403                 break;
404
405             case 12: /* 192-bit key */
406                 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_192b;
407                 AES_CRYP_KeyInitStructure.CRYP_Key1Left  = dec_key[0];
408                 AES_CRYP_KeyInitStructure.CRYP_Key1Right = dec_key[1];
409                 AES_CRYP_KeyInitStructure.CRYP_Key2Left  = dec_key[2];
410                 AES_CRYP_KeyInitStructure.CRYP_Key2Right = dec_key[3];
411                 AES_CRYP_KeyInitStructure.CRYP_Key3Left  = dec_key[4];
412                 AES_CRYP_KeyInitStructure.CRYP_Key3Right = dec_key[5];
413                 break;
414
415             case 14: /* 256-bit key */
416                 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_256b;
417                 AES_CRYP_KeyInitStructure.CRYP_Key0Left  = dec_key[0];
418                 AES_CRYP_KeyInitStructure.CRYP_Key0Right = dec_key[1];
419                 AES_CRYP_KeyInitStructure.CRYP_Key1Left  = dec_key[2];
420                 AES_CRYP_KeyInitStructure.CRYP_Key1Right = dec_key[3];
421                 AES_CRYP_KeyInitStructure.CRYP_Key2Left  = dec_key[4];
422                 AES_CRYP_KeyInitStructure.CRYP_Key2Right = dec_key[5];
423                 AES_CRYP_KeyInitStructure.CRYP_Key3Left  = dec_key[6];
424                 AES_CRYP_KeyInitStructure.CRYP_Key3Right = dec_key[7];
425                 break;
426
427             default:
428                 break;
429         }
430
431         /* set direction, mode, and datatype for key preparation */
432         AES_CRYP_InitStructure.CRYP_AlgoDir  = CRYP_AlgoDir_Decrypt;
433         AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_Key;
434         AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_32b;
435         CRYP_Init(&AES_CRYP_InitStructure);
436         CRYP_KeyInit(&AES_CRYP_KeyInitStructure);
437
438         /* enable crypto processor */
439         CRYP_Cmd(ENABLE);
440
441         /* wait until key has been prepared */
442         while(CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {}
443
444         /* set direction, mode, and datatype for decryption */
445         AES_CRYP_InitStructure.CRYP_AlgoDir  = CRYP_AlgoDir_Decrypt;
446         AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_CBC;
447         AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b;
448         CRYP_Init(&AES_CRYP_InitStructure);
449
450         /* set iv */
451         ByteReverseWords(iv, iv, AES_BLOCK_SIZE);
452
453         AES_CRYP_IVInitStructure.CRYP_IV0Left  = iv[0];
454         AES_CRYP_IVInitStructure.CRYP_IV0Right = iv[1];
455         AES_CRYP_IVInitStructure.CRYP_IV1Left  = iv[2];
456         AES_CRYP_IVInitStructure.CRYP_IV1Right = iv[3];
457         CRYP_IVInit(&AES_CRYP_IVInitStructure);
458
459         /* enable crypto processor */
460         CRYP_Cmd(ENABLE);
461
462         while (sz > 0)
463         {
464             /* flush IN/OUT FIFOs */
465             CRYP_FIFOFlush();
466
467             CRYP_DataIn(*(uint32_t*)&in[0]);
468             CRYP_DataIn(*(uint32_t*)&in[4]);
469             CRYP_DataIn(*(uint32_t*)&in[8]);
470             CRYP_DataIn(*(uint32_t*)&in[12]);
471
472             /* wait until the complete message has been processed */
473             while(CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {}
474
475             *(uint32_t*)&out[0]  = CRYP_DataOut();
476             *(uint32_t*)&out[4]  = CRYP_DataOut();
477             *(uint32_t*)&out[8]  = CRYP_DataOut();
478             *(uint32_t*)&out[12] = CRYP_DataOut();
479
480             /* store iv for next call */
481             XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE);
482
483             sz -= 16;
484             in += 16;
485             out += 16;
486         }
487
488         /* disable crypto processor */
489         CRYP_Cmd(DISABLE);
490
491         return 0;
492     }
493
494     #ifdef CYASSL_AES_COUNTER
495
496     /* AES-CTR calls this for key setup */
497     int AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen,
498                         const byte* iv, int dir)
499     {
500         return AesSetKey(aes, userKey, keylen, iv, dir);
501     }
502
503     void AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
504     {
505         word32 *enc_key, *iv;
506         CRYP_InitTypeDef AES_CRYP_InitStructure;
507         CRYP_KeyInitTypeDef AES_CRYP_KeyInitStructure;
508         CRYP_IVInitTypeDef AES_CRYP_IVInitStructure;
509
510         enc_key = aes->key;
511         iv = aes->reg;
512
513         /* crypto structure initialization */
514         CRYP_KeyStructInit(&AES_CRYP_KeyInitStructure);
515         CRYP_StructInit(&AES_CRYP_InitStructure);
516         CRYP_IVStructInit(&AES_CRYP_IVInitStructure);
517
518         /* reset registers to their default values */
519         CRYP_DeInit();
520
521         /* load key into correct registers */
522         switch(aes->rounds)
523         {
524             case 10: /* 128-bit key */
525                 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_128b;
526                 AES_CRYP_KeyInitStructure.CRYP_Key2Left  = enc_key[0];
527                 AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[1];
528                 AES_CRYP_KeyInitStructure.CRYP_Key3Left  = enc_key[2];
529                 AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[3];
530                 break;
531
532             case 12: /* 192-bit key */
533                 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_192b;
534                 AES_CRYP_KeyInitStructure.CRYP_Key1Left  = enc_key[0];
535                 AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[1];
536                 AES_CRYP_KeyInitStructure.CRYP_Key2Left  = enc_key[2];
537                 AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[3];
538                 AES_CRYP_KeyInitStructure.CRYP_Key3Left  = enc_key[4];
539                 AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[5];
540                 break;
541
542             case 14: /* 256-bit key */
543                 AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_256b;
544                 AES_CRYP_KeyInitStructure.CRYP_Key0Left  = enc_key[0];
545                 AES_CRYP_KeyInitStructure.CRYP_Key0Right = enc_key[1];
546                 AES_CRYP_KeyInitStructure.CRYP_Key1Left  = enc_key[2];
547                 AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[3];
548                 AES_CRYP_KeyInitStructure.CRYP_Key2Left  = enc_key[4];
549                 AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[5];
550                 AES_CRYP_KeyInitStructure.CRYP_Key3Left  = enc_key[6];
551                 AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[7];
552                 break;
553
554             default:
555                 break;
556         }
557         CRYP_KeyInit(&AES_CRYP_KeyInitStructure);
558
559         /* set iv */
560         ByteReverseWords(iv, iv, AES_BLOCK_SIZE);
561         AES_CRYP_IVInitStructure.CRYP_IV0Left  = iv[0];
562         AES_CRYP_IVInitStructure.CRYP_IV0Right = iv[1];
563         AES_CRYP_IVInitStructure.CRYP_IV1Left  = iv[2];
564         AES_CRYP_IVInitStructure.CRYP_IV1Right = iv[3];
565         CRYP_IVInit(&AES_CRYP_IVInitStructure);
566
567         /* set direction, mode, and datatype */
568         AES_CRYP_InitStructure.CRYP_AlgoDir  = CRYP_AlgoDir_Encrypt;
569         AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_CTR;
570         AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b;
571         CRYP_Init(&AES_CRYP_InitStructure);
572
573         /* enable crypto processor */
574         CRYP_Cmd(ENABLE);
575
576         while (sz > 0)
577         {
578             /* flush IN/OUT FIFOs */
579             CRYP_FIFOFlush();
580
581             CRYP_DataIn(*(uint32_t*)&in[0]);
582             CRYP_DataIn(*(uint32_t*)&in[4]);
583             CRYP_DataIn(*(uint32_t*)&in[8]);
584             CRYP_DataIn(*(uint32_t*)&in[12]);
585
586             /* wait until the complete message has been processed */
587             while(CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {}
588
589             *(uint32_t*)&out[0]  = CRYP_DataOut();
590             *(uint32_t*)&out[4]  = CRYP_DataOut();
591             *(uint32_t*)&out[8]  = CRYP_DataOut();
592             *(uint32_t*)&out[12] = CRYP_DataOut();
593
594             /* store iv for next call */
595             XMEMCPY(aes->reg, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
596
597             sz  -= 16;
598             in  += 16;
599             out += 16;
600         }
601
602         /* disable crypto processor */
603         CRYP_Cmd(DISABLE);
604     }
605
606     #endif /* CYASSL_AES_COUNTER */
607
608 #elif defined(HAVE_COLDFIRE_SEC)
609
610 #include <cyassl/ctaocrypt/types.h>
611
612 #include "sec.h"
613 #include "mcf5475_sec.h"
614 #include "mcf5475_siu.h"
615
616 #if defined (HAVE_THREADX)
617 #include "memory_pools.h"
618 extern TX_BYTE_POOL mp_ncached;  /* Non Cached memory pool */
619 #endif
620
621 #define AES_BUFFER_SIZE (AES_BLOCK_SIZE * 64)
622 static unsigned char *AESBuffIn = NULL ;
623 static unsigned char *AESBuffOut = NULL ;
624 static byte *secReg ; 
625 static byte *secKey ; 
626 static volatile SECdescriptorType *secDesc ;
627
628 static CyaSSL_Mutex Mutex_AesSEC ;
629   
630 #define SEC_DESC_AES_CBC_ENCRYPT 0x60300010
631 #define SEC_DESC_AES_CBC_DECRYPT 0x60200010
632
633 extern volatile unsigned char __MBAR[];
634     
635 static int AesCbcCrypt(Aes* aes, byte* po, const byte* pi, word32 sz, word32 descHeader)
636 {
637     #ifdef DEBUG_CYASSL
638     int i ;  int stat1, stat2 ;   int ret ; 
639           #endif
640
641     int size ;
642     volatile int v ;
643
644     if((pi == NULL) || (po == NULL))
645         return BAD_FUNC_ARG;/*wrong pointer*/
646
647     LockMutex(&Mutex_AesSEC) ;
648
649     /* Set descriptor for SEC */            
650     secDesc->length1 = 0x0;
651     secDesc->pointer1 = NULL;
652         
653     secDesc->length2 = AES_BLOCK_SIZE;
654     secDesc->pointer2 = (byte *)secReg ; /* Initial Vector */
655     
656     switch(aes->rounds) {
657         case 10: secDesc->length3 = 16 ; break ;
658         case 12: secDesc->length3 = 24 ; break ;
659         case 14: secDesc->length3 = 32 ; break ;
660     } 
661     XMEMCPY(secKey, aes->key, secDesc->length3) ;
662
663     secDesc->pointer3 = (byte *)secKey;
664     secDesc->pointer4 = AESBuffIn ;
665     secDesc->pointer5 = AESBuffOut ;
666     secDesc->length6 = 0x0;
667     secDesc->pointer6 = NULL;
668     secDesc->length7 = 0x0;
669     secDesc->pointer7 = NULL;
670     secDesc->nextDescriptorPtr = NULL;
671   
672     while(sz) {
673         secDesc->header = descHeader ;
674         XMEMCPY(secReg, aes->reg, AES_BLOCK_SIZE) ;
675         if((sz%AES_BUFFER_SIZE) == sz) {
676             size = sz ;
677             sz = 0 ;
678         } else {
679             size = AES_BUFFER_SIZE ;
680             sz -= AES_BUFFER_SIZE ;
681         }
682         secDesc->length4 = size;
683         secDesc->length5 = size;
684         
685         XMEMCPY(AESBuffIn, pi, size) ;
686         if(descHeader == SEC_DESC_AES_CBC_DECRYPT) {
687             XMEMCPY((void*)aes->tmp, (void*)&(pi[size-AES_BLOCK_SIZE]), AES_BLOCK_SIZE) ;
688         }
689
690         /* Point SEC to the location of the descriptor */
691         MCF_SEC_FR0 = (uint32)secDesc;
692         /* Initialize SEC and wait for encryption to complete */
693         MCF_SEC_CCCR0 = 0x0000001a;
694         /* poll SISR to determine when channel is complete */
695         v=0 ;
696         while((secDesc->header>> 24) != 0xff)v++ ;
697
698 #ifdef DEBUG_CYASSL
699         ret = MCF_SEC_SISRH;
700         stat1 = MCF_SEC_AESSR ; 
701         stat2 = MCF_SEC_AESISR ; 
702         if(ret & 0xe0000000)
703         {
704             db_printf("Aes_Cbc(i=%d):ISRH=%08x, AESSR=%08x, AESISR=%08x\n", i, ret, stat1, stat2) ; 
705         }
706 #endif
707
708         XMEMCPY(po, AESBuffOut, size) ;
709
710         if(descHeader == SEC_DESC_AES_CBC_ENCRYPT) {
711             XMEMCPY((void*)aes->reg, (void*)&(po[size-AES_BLOCK_SIZE]), AES_BLOCK_SIZE) ;
712         } else {
713             XMEMCPY((void*)aes->reg, (void*)aes->tmp, AES_BLOCK_SIZE) ;
714         }
715
716         pi += size ; 
717         po += size ;
718     }
719     UnLockMutex(&Mutex_AesSEC) ;
720     return 0 ; 
721 }
722
723 int AesCbcEncrypt(Aes* aes, byte* po, const byte* pi, word32 sz)
724 {
725     return(AesCbcCrypt(aes, po, pi, sz, SEC_DESC_AES_CBC_ENCRYPT)) ;
726 }
727
728 int AesCbcDecrypt(Aes* aes, byte* po, const byte* pi, word32 sz)
729 {
730     return(AesCbcCrypt(aes, po, pi, sz, SEC_DESC_AES_CBC_DECRYPT)) ;
731 }
732
733 int AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv,
734                   int dir)
735 {
736     
737     if(AESBuffIn == NULL) {
738         #if defined (HAVE_THREADX)
739                           int s1, s2, s3, s4, s5 ;
740         s5 = tx_byte_allocate(&mp_ncached,(void *)&secDesc, sizeof(SECdescriptorType), TX_NO_WAIT);
741         s1 = tx_byte_allocate(&mp_ncached,(void *)&AESBuffIn, AES_BUFFER_SIZE, TX_NO_WAIT);
742         s2 = tx_byte_allocate(&mp_ncached,(void *)&AESBuffOut, AES_BUFFER_SIZE, TX_NO_WAIT);
743         s3 = tx_byte_allocate(&mp_ncached,(void *)&secKey, AES_BLOCK_SIZE*2,TX_NO_WAIT);
744         s4 = tx_byte_allocate(&mp_ncached,(void *)&secReg, AES_BLOCK_SIZE,  TX_NO_WAIT);
745         
746         if(s1 || s2 || s3 || s4 || s5)
747          return BAD_FUNC_ARG;
748         
749         #else
750         #warning "Allocate non-Cache buffers"
751         #endif
752         
753         InitMutex(&Mutex_AesSEC) ;
754     }
755
756     if (!((keylen == 16) || (keylen == 24) || (keylen == 32)))
757         return BAD_FUNC_ARG;
758     if (aes == NULL)
759         return BAD_FUNC_ARG;    
760     
761     aes->rounds = keylen/4 + 6;
762
763     XMEMCPY(aes->key, userKey, keylen);         
764     if (iv)
765         XMEMCPY(aes->reg, iv, AES_BLOCK_SIZE);
766     
767     return 0;
768 }
769
770 #elif defined FREESCALE_MMCAU
771     /*
772      * Freescale mmCAU hardware AES support through the CAU/mmCAU library.
773      * Documentation located in ColdFire/ColdFire+ CAU and Kinetis mmCAU
774      * Software Library User Guide (See note in README).
775      */
776     #include "cau_api.h"
777
778     int AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv,
779                   int dir)
780     {
781         byte *rk = (byte*)aes->key;
782
783         if (!((keylen == 16) || (keylen == 24) || (keylen == 32)))
784             return BAD_FUNC_ARG;
785
786         if (rk == NULL)
787             return BAD_FUNC_ARG;
788
789         aes->rounds = keylen/4 + 6;
790         cau_aes_set_key(userKey, keylen*8, rk);
791
792         return AesSetIV(aes, iv);
793     }
794
795     int AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
796     {
797         int i;
798         int offset = 0;
799         int len = sz;
800
801         byte *iv, *enc_key;
802         byte temp_block[AES_BLOCK_SIZE];
803
804         iv      = (byte*)aes->reg;
805         enc_key = (byte*)aes->key;
806
807         if ((word)out % CYASSL_MMCAU_ALIGNMENT) {
808             CYASSL_MSG("Bad cau_aes_encrypt alignment"); 
809             return BAD_ALIGN_E;
810         }
811
812         while (len > 0)
813         {
814             XMEMCPY(temp_block, in + offset, AES_BLOCK_SIZE);
815
816             /* XOR block with IV for CBC */
817             for (i = 0; i < AES_BLOCK_SIZE; i++)
818                 temp_block[i] ^= iv[i];
819
820             cau_aes_encrypt(temp_block, enc_key, aes->rounds, out + offset);
821
822             len    -= AES_BLOCK_SIZE;
823             offset += AES_BLOCK_SIZE;
824
825             /* store IV for next block */
826             XMEMCPY(iv, out + offset - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
827         }
828
829         return 0;
830     }
831
832     int AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
833     {
834         int i;
835         int offset = 0;
836         int len = sz;
837
838         byte* iv, *dec_key;
839         byte temp_block[AES_BLOCK_SIZE];
840
841         iv      = (byte*)aes->reg;
842         dec_key = (byte*)aes->key;
843
844         if ((word)out % CYASSL_MMCAU_ALIGNMENT) {
845             CYASSL_MSG("Bad cau_aes_decrypt alignment"); 
846             return BAD_ALIGN_E;
847         }
848
849         while (len > 0)
850         {
851             XMEMCPY(temp_block, in + offset, AES_BLOCK_SIZE);
852
853             cau_aes_decrypt(in + offset, dec_key, aes->rounds, out + offset);
854
855             /* XOR block with IV for CBC */
856             for (i = 0; i < AES_BLOCK_SIZE; i++)
857                 (out + offset)[i] ^= iv[i];
858
859             /* store IV for next block */
860             XMEMCPY(iv, temp_block, AES_BLOCK_SIZE);
861
862             len    -= AES_BLOCK_SIZE;
863             offset += AES_BLOCK_SIZE;
864         }
865
866         return 0;
867     }
868
869
870 #else /* CTaoCrypt software implementation */
871
872 static const word32 rcon[] = {
873     0x01000000, 0x02000000, 0x04000000, 0x08000000,
874     0x10000000, 0x20000000, 0x40000000, 0x80000000,
875     0x1B000000, 0x36000000, 
876     /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
877 };
878
879
880 static const word32 Te[5][256] = {
881 {
882     0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
883     0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
884     0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
885     0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
886     0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
887     0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
888     0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
889     0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
890     0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
891     0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
892     0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
893     0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
894     0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
895     0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
896     0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
897     0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
898     0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
899     0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
900     0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
901     0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
902     0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
903     0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
904     0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
905     0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
906     0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
907     0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
908     0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
909     0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
910     0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
911     0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
912     0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
913     0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
914     0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
915     0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
916     0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
917     0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
918     0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
919     0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
920     0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
921     0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
922     0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
923     0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
924     0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
925     0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
926     0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
927     0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
928     0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
929     0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
930     0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
931     0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
932     0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
933     0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
934     0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
935     0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
936     0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
937     0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
938     0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
939     0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
940     0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
941     0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
942     0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
943     0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
944     0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
945     0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
946 },
947 {
948     0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
949     0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
950     0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
951     0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
952     0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
953     0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
954     0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
955     0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
956     0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
957     0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
958     0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
959     0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
960     0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
961     0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
962     0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
963     0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
964     0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
965     0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
966     0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
967     0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
968     0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
969     0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
970     0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
971     0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
972     0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
973     0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
974     0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
975     0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
976     0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
977     0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
978     0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
979     0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
980     0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
981     0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
982     0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
983     0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
984     0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
985     0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
986     0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
987     0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
988     0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
989     0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
990     0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
991     0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
992     0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
993     0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
994     0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
995     0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
996     0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
997     0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
998     0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
999     0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
1000     0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
1001     0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
1002     0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
1003     0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
1004     0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
1005     0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
1006     0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
1007     0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
1008     0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
1009     0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
1010     0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
1011     0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
1012 },
1013 {
1014     0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
1015     0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
1016     0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
1017     0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
1018     0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
1019     0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
1020     0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
1021     0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
1022     0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
1023     0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
1024     0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
1025     0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
1026     0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
1027     0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
1028     0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
1029     0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
1030     0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
1031     0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
1032     0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
1033     0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
1034     0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
1035     0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
1036     0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
1037     0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
1038     0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
1039     0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
1040     0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
1041     0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
1042     0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
1043     0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
1044     0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
1045     0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
1046     0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
1047     0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
1048     0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
1049     0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
1050     0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
1051     0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
1052     0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
1053     0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
1054     0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
1055     0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
1056     0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
1057     0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
1058     0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
1059     0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
1060     0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
1061     0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
1062     0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
1063     0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
1064     0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
1065     0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
1066     0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
1067     0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
1068     0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
1069     0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
1070     0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
1071     0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
1072     0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
1073     0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
1074     0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
1075     0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
1076     0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
1077     0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
1078 },
1079 {
1080     0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
1081     0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
1082     0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
1083     0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
1084     0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
1085     0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
1086     0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
1087     0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
1088     0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
1089     0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
1090     0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
1091     0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
1092     0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
1093     0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
1094     0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
1095     0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
1096     0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
1097     0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
1098     0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
1099     0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
1100     0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
1101     0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
1102     0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
1103     0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
1104     0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
1105     0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
1106     0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
1107     0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
1108     0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
1109     0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
1110     0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
1111     0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
1112     0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
1113     0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
1114     0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
1115     0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
1116     0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
1117     0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
1118     0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
1119     0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
1120     0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
1121     0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
1122     0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
1123     0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
1124     0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
1125     0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
1126     0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
1127     0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
1128     0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
1129     0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
1130     0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
1131     0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
1132     0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
1133     0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
1134     0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
1135     0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
1136     0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
1137     0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
1138     0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
1139     0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
1140     0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
1141     0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
1142     0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
1143     0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
1144 },
1145 {
1146     0x63636363U, 0x7c7c7c7cU, 0x77777777U, 0x7b7b7b7bU,
1147     0xf2f2f2f2U, 0x6b6b6b6bU, 0x6f6f6f6fU, 0xc5c5c5c5U,
1148     0x30303030U, 0x01010101U, 0x67676767U, 0x2b2b2b2bU,
1149     0xfefefefeU, 0xd7d7d7d7U, 0xababababU, 0x76767676U,
1150     0xcacacacaU, 0x82828282U, 0xc9c9c9c9U, 0x7d7d7d7dU,
1151     0xfafafafaU, 0x59595959U, 0x47474747U, 0xf0f0f0f0U,
1152     0xadadadadU, 0xd4d4d4d4U, 0xa2a2a2a2U, 0xafafafafU,
1153     0x9c9c9c9cU, 0xa4a4a4a4U, 0x72727272U, 0xc0c0c0c0U,
1154     0xb7b7b7b7U, 0xfdfdfdfdU, 0x93939393U, 0x26262626U,
1155     0x36363636U, 0x3f3f3f3fU, 0xf7f7f7f7U, 0xccccccccU,
1156     0x34343434U, 0xa5a5a5a5U, 0xe5e5e5e5U, 0xf1f1f1f1U,
1157     0x71717171U, 0xd8d8d8d8U, 0x31313131U, 0x15151515U,
1158     0x04040404U, 0xc7c7c7c7U, 0x23232323U, 0xc3c3c3c3U,
1159     0x18181818U, 0x96969696U, 0x05050505U, 0x9a9a9a9aU,
1160     0x07070707U, 0x12121212U, 0x80808080U, 0xe2e2e2e2U,
1161     0xebebebebU, 0x27272727U, 0xb2b2b2b2U, 0x75757575U,
1162     0x09090909U, 0x83838383U, 0x2c2c2c2cU, 0x1a1a1a1aU,
1163     0x1b1b1b1bU, 0x6e6e6e6eU, 0x5a5a5a5aU, 0xa0a0a0a0U,
1164     0x52525252U, 0x3b3b3b3bU, 0xd6d6d6d6U, 0xb3b3b3b3U,
1165     0x29292929U, 0xe3e3e3e3U, 0x2f2f2f2fU, 0x84848484U,
1166     0x53535353U, 0xd1d1d1d1U, 0x00000000U, 0xededededU,
1167     0x20202020U, 0xfcfcfcfcU, 0xb1b1b1b1U, 0x5b5b5b5bU,
1168     0x6a6a6a6aU, 0xcbcbcbcbU, 0xbebebebeU, 0x39393939U,
1169     0x4a4a4a4aU, 0x4c4c4c4cU, 0x58585858U, 0xcfcfcfcfU,
1170     0xd0d0d0d0U, 0xefefefefU, 0xaaaaaaaaU, 0xfbfbfbfbU,
1171     0x43434343U, 0x4d4d4d4dU, 0x33333333U, 0x85858585U,
1172     0x45454545U, 0xf9f9f9f9U, 0x02020202U, 0x7f7f7f7fU,
1173     0x50505050U, 0x3c3c3c3cU, 0x9f9f9f9fU, 0xa8a8a8a8U,
1174     0x51515151U, 0xa3a3a3a3U, 0x40404040U, 0x8f8f8f8fU,
1175     0x92929292U, 0x9d9d9d9dU, 0x38383838U, 0xf5f5f5f5U,
1176     0xbcbcbcbcU, 0xb6b6b6b6U, 0xdadadadaU, 0x21212121U,
1177     0x10101010U, 0xffffffffU, 0xf3f3f3f3U, 0xd2d2d2d2U,
1178     0xcdcdcdcdU, 0x0c0c0c0cU, 0x13131313U, 0xececececU,
1179     0x5f5f5f5fU, 0x97979797U, 0x44444444U, 0x17171717U,
1180     0xc4c4c4c4U, 0xa7a7a7a7U, 0x7e7e7e7eU, 0x3d3d3d3dU,
1181     0x64646464U, 0x5d5d5d5dU, 0x19191919U, 0x73737373U,
1182     0x60606060U, 0x81818181U, 0x4f4f4f4fU, 0xdcdcdcdcU,
1183     0x22222222U, 0x2a2a2a2aU, 0x90909090U, 0x88888888U,
1184     0x46464646U, 0xeeeeeeeeU, 0xb8b8b8b8U, 0x14141414U,
1185     0xdedededeU, 0x5e5e5e5eU, 0x0b0b0b0bU, 0xdbdbdbdbU,
1186     0xe0e0e0e0U, 0x32323232U, 0x3a3a3a3aU, 0x0a0a0a0aU,
1187     0x49494949U, 0x06060606U, 0x24242424U, 0x5c5c5c5cU,
1188     0xc2c2c2c2U, 0xd3d3d3d3U, 0xacacacacU, 0x62626262U,
1189     0x91919191U, 0x95959595U, 0xe4e4e4e4U, 0x79797979U,
1190     0xe7e7e7e7U, 0xc8c8c8c8U, 0x37373737U, 0x6d6d6d6dU,
1191     0x8d8d8d8dU, 0xd5d5d5d5U, 0x4e4e4e4eU, 0xa9a9a9a9U,
1192     0x6c6c6c6cU, 0x56565656U, 0xf4f4f4f4U, 0xeaeaeaeaU,
1193     0x65656565U, 0x7a7a7a7aU, 0xaeaeaeaeU, 0x08080808U,
1194     0xbabababaU, 0x78787878U, 0x25252525U, 0x2e2e2e2eU,
1195     0x1c1c1c1cU, 0xa6a6a6a6U, 0xb4b4b4b4U, 0xc6c6c6c6U,
1196     0xe8e8e8e8U, 0xddddddddU, 0x74747474U, 0x1f1f1f1fU,
1197     0x4b4b4b4bU, 0xbdbdbdbdU, 0x8b8b8b8bU, 0x8a8a8a8aU,
1198     0x70707070U, 0x3e3e3e3eU, 0xb5b5b5b5U, 0x66666666U,
1199     0x48484848U, 0x03030303U, 0xf6f6f6f6U, 0x0e0e0e0eU,
1200     0x61616161U, 0x35353535U, 0x57575757U, 0xb9b9b9b9U,
1201     0x86868686U, 0xc1c1c1c1U, 0x1d1d1d1dU, 0x9e9e9e9eU,
1202     0xe1e1e1e1U, 0xf8f8f8f8U, 0x98989898U, 0x11111111U,
1203     0x69696969U, 0xd9d9d9d9U, 0x8e8e8e8eU, 0x94949494U,
1204     0x9b9b9b9bU, 0x1e1e1e1eU, 0x87878787U, 0xe9e9e9e9U,
1205     0xcecececeU, 0x55555555U, 0x28282828U, 0xdfdfdfdfU,
1206     0x8c8c8c8cU, 0xa1a1a1a1U, 0x89898989U, 0x0d0d0d0dU,
1207     0xbfbfbfbfU, 0xe6e6e6e6U, 0x42424242U, 0x68686868U,
1208     0x41414141U, 0x99999999U, 0x2d2d2d2dU, 0x0f0f0f0fU,
1209     0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U,
1210 }
1211 };
1212
1213
1214 static const word32 Td[5][256] = {
1215 {
1216     0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
1217     0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
1218     0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
1219     0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
1220     0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
1221     0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
1222     0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
1223     0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
1224     0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
1225     0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
1226     0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
1227     0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
1228     0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
1229     0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
1230     0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
1231     0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
1232     0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
1233     0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
1234     0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
1235     0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
1236     0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
1237     0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
1238     0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
1239     0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
1240     0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
1241     0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
1242     0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
1243     0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
1244     0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
1245     0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
1246     0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
1247     0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
1248     0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
1249     0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
1250     0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
1251     0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
1252     0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
1253     0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
1254     0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
1255     0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
1256     0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
1257     0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
1258     0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
1259     0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
1260     0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
1261     0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
1262     0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
1263     0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
1264     0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
1265     0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
1266     0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
1267     0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
1268     0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
1269     0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
1270     0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
1271     0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
1272     0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
1273     0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
1274     0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
1275     0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
1276     0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
1277     0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
1278     0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
1279     0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
1280 },
1281 {
1282     0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
1283     0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
1284     0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
1285     0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
1286     0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
1287     0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
1288     0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
1289     0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
1290     0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
1291     0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
1292     0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
1293     0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
1294     0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
1295     0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
1296     0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
1297     0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
1298     0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
1299     0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
1300     0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
1301     0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
1302     0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
1303     0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
1304     0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
1305     0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
1306     0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
1307     0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
1308     0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
1309     0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
1310     0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
1311     0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
1312     0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
1313     0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
1314     0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
1315     0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
1316     0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
1317     0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
1318     0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
1319     0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
1320     0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
1321     0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
1322     0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
1323     0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
1324     0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
1325     0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
1326     0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
1327     0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
1328     0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
1329     0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
1330     0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
1331     0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
1332     0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
1333     0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
1334     0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
1335     0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
1336     0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
1337     0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
1338     0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
1339     0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
1340     0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
1341     0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
1342     0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
1343     0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
1344     0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
1345     0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
1346 },
1347 {
1348     0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
1349     0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
1350     0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
1351     0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
1352     0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
1353     0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
1354     0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
1355     0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
1356     0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
1357     0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
1358     0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
1359     0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
1360     0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
1361     0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
1362     0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
1363     0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
1364     0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
1365     0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
1366     0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
1367     0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
1368
1369     0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
1370     0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
1371     0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
1372     0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
1373     0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
1374     0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
1375     0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
1376     0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
1377     0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
1378     0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
1379     0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
1380     0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
1381     0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
1382     0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
1383     0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
1384     0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
1385     0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
1386     0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
1387     0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
1388     0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
1389     0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
1390     0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
1391     0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
1392     0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
1393     0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
1394     0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
1395     0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
1396     0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
1397     0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
1398     0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
1399     0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
1400     0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
1401     0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
1402     0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
1403     0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
1404     0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
1405     0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
1406     0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
1407     0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
1408     0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
1409     0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
1410     0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
1411     0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
1412     0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
1413 },
1414 {
1415     0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
1416     0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
1417     0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
1418     0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
1419     0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
1420     0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
1421     0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
1422     0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
1423     0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
1424     0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
1425     0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
1426     0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
1427     0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
1428     0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
1429     0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
1430     0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
1431     0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
1432     0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
1433     0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
1434     0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
1435     0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
1436     0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
1437     0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
1438     0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
1439     0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
1440     0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
1441     0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
1442     0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
1443     0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
1444     0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
1445     0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
1446     0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
1447     0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
1448     0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
1449     0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
1450     0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
1451     0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
1452     0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
1453     0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
1454     0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
1455     0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
1456     0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
1457     0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
1458     0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
1459     0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
1460     0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
1461     0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
1462     0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
1463     0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
1464     0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
1465     0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
1466     0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
1467     0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
1468     0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
1469     0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
1470     0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
1471     0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
1472     0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
1473     0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
1474     0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
1475     0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
1476     0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
1477     0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
1478     0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
1479 },
1480 {
1481     0x52525252U, 0x09090909U, 0x6a6a6a6aU, 0xd5d5d5d5U,
1482     0x30303030U, 0x36363636U, 0xa5a5a5a5U, 0x38383838U,
1483     0xbfbfbfbfU, 0x40404040U, 0xa3a3a3a3U, 0x9e9e9e9eU,
1484     0x81818181U, 0xf3f3f3f3U, 0xd7d7d7d7U, 0xfbfbfbfbU,
1485     0x7c7c7c7cU, 0xe3e3e3e3U, 0x39393939U, 0x82828282U,
1486     0x9b9b9b9bU, 0x2f2f2f2fU, 0xffffffffU, 0x87878787U,
1487     0x34343434U, 0x8e8e8e8eU, 0x43434343U, 0x44444444U,
1488     0xc4c4c4c4U, 0xdedededeU, 0xe9e9e9e9U, 0xcbcbcbcbU,
1489     0x54545454U, 0x7b7b7b7bU, 0x94949494U, 0x32323232U,
1490     0xa6a6a6a6U, 0xc2c2c2c2U, 0x23232323U, 0x3d3d3d3dU,
1491     0xeeeeeeeeU, 0x4c4c4c4cU, 0x95959595U, 0x0b0b0b0bU,
1492     0x42424242U, 0xfafafafaU, 0xc3c3c3c3U, 0x4e4e4e4eU,
1493     0x08080808U, 0x2e2e2e2eU, 0xa1a1a1a1U, 0x66666666U,
1494     0x28282828U, 0xd9d9d9d9U, 0x24242424U, 0xb2b2b2b2U,
1495     0x76767676U, 0x5b5b5b5bU, 0xa2a2a2a2U, 0x49494949U,
1496     0x6d6d6d6dU, 0x8b8b8b8bU, 0xd1d1d1d1U, 0x25252525U,
1497     0x72727272U, 0xf8f8f8f8U, 0xf6f6f6f6U, 0x64646464U,
1498     0x86868686U, 0x68686868U, 0x98989898U, 0x16161616U,
1499     0xd4d4d4d4U, 0xa4a4a4a4U, 0x5c5c5c5cU, 0xccccccccU,
1500     0x5d5d5d5dU, 0x65656565U, 0xb6b6b6b6U, 0x92929292U,
1501     0x6c6c6c6cU, 0x70707070U, 0x48484848U, 0x50505050U,
1502     0xfdfdfdfdU, 0xededededU, 0xb9b9b9b9U, 0xdadadadaU,
1503     0x5e5e5e5eU, 0x15151515U, 0x46464646U, 0x57575757U,
1504     0xa7a7a7a7U, 0x8d8d8d8dU, 0x9d9d9d9dU, 0x84848484U,
1505     0x90909090U, 0xd8d8d8d8U, 0xababababU, 0x00000000U,
1506     0x8c8c8c8cU, 0xbcbcbcbcU, 0xd3d3d3d3U, 0x0a0a0a0aU,
1507     0xf7f7f7f7U, 0xe4e4e4e4U, 0x58585858U, 0x05050505U,
1508     0xb8b8b8b8U, 0xb3b3b3b3U, 0x45454545U, 0x06060606U,
1509     0xd0d0d0d0U, 0x2c2c2c2cU, 0x1e1e1e1eU, 0x8f8f8f8fU,
1510     0xcacacacaU, 0x3f3f3f3fU, 0x0f0f0f0fU, 0x02020202U,
1511     0xc1c1c1c1U, 0xafafafafU, 0xbdbdbdbdU, 0x03030303U,
1512     0x01010101U, 0x13131313U, 0x8a8a8a8aU, 0x6b6b6b6bU,
1513     0x3a3a3a3aU, 0x91919191U, 0x11111111U, 0x41414141U,
1514     0x4f4f4f4fU, 0x67676767U, 0xdcdcdcdcU, 0xeaeaeaeaU,
1515     0x97979797U, 0xf2f2f2f2U, 0xcfcfcfcfU, 0xcecececeU,
1516     0xf0f0f0f0U, 0xb4b4b4b4U, 0xe6e6e6e6U, 0x73737373U,
1517     0x96969696U, 0xacacacacU, 0x74747474U, 0x22222222U,
1518     0xe7e7e7e7U, 0xadadadadU, 0x35353535U, 0x85858585U,
1519     0xe2e2e2e2U, 0xf9f9f9f9U, 0x37373737U, 0xe8e8e8e8U,
1520     0x1c1c1c1cU, 0x75757575U, 0xdfdfdfdfU, 0x6e6e6e6eU,
1521     0x47474747U, 0xf1f1f1f1U, 0x1a1a1a1aU, 0x71717171U,
1522     0x1d1d1d1dU, 0x29292929U, 0xc5c5c5c5U, 0x89898989U,
1523     0x6f6f6f6fU, 0xb7b7b7b7U, 0x62626262U, 0x0e0e0e0eU,
1524     0xaaaaaaaaU, 0x18181818U, 0xbebebebeU, 0x1b1b1b1bU,
1525     0xfcfcfcfcU, 0x56565656U, 0x3e3e3e3eU, 0x4b4b4b4bU,
1526     0xc6c6c6c6U, 0xd2d2d2d2U, 0x79797979U, 0x20202020U,
1527     0x9a9a9a9aU, 0xdbdbdbdbU, 0xc0c0c0c0U, 0xfefefefeU,
1528     0x78787878U, 0xcdcdcdcdU, 0x5a5a5a5aU, 0xf4f4f4f4U,
1529     0x1f1f1f1fU, 0xddddddddU, 0xa8a8a8a8U, 0x33333333U,
1530     0x88888888U, 0x07070707U, 0xc7c7c7c7U, 0x31313131U,
1531     0xb1b1b1b1U, 0x12121212U, 0x10101010U, 0x59595959U,
1532     0x27272727U, 0x80808080U, 0xececececU, 0x5f5f5f5fU,
1533     0x60606060U, 0x51515151U, 0x7f7f7f7fU, 0xa9a9a9a9U,
1534     0x19191919U, 0xb5b5b5b5U, 0x4a4a4a4aU, 0x0d0d0d0dU,
1535     0x2d2d2d2dU, 0xe5e5e5e5U, 0x7a7a7a7aU, 0x9f9f9f9fU,
1536     0x93939393U, 0xc9c9c9c9U, 0x9c9c9c9cU, 0xefefefefU,
1537     0xa0a0a0a0U, 0xe0e0e0e0U, 0x3b3b3b3bU, 0x4d4d4d4dU,
1538     0xaeaeaeaeU, 0x2a2a2a2aU, 0xf5f5f5f5U, 0xb0b0b0b0U,
1539     0xc8c8c8c8U, 0xebebebebU, 0xbbbbbbbbU, 0x3c3c3c3cU,
1540     0x83838383U, 0x53535353U, 0x99999999U, 0x61616161U,
1541     0x17171717U, 0x2b2b2b2bU, 0x04040404U, 0x7e7e7e7eU,
1542     0xbabababaU, 0x77777777U, 0xd6d6d6d6U, 0x26262626U,
1543     0xe1e1e1e1U, 0x69696969U, 0x14141414U, 0x63636363U,
1544     0x55555555U, 0x21212121U, 0x0c0c0c0cU, 0x7d7d7d7dU,
1545 }
1546 };
1547
1548
1549 #define GETBYTE(x, y) (word32)((byte)((x) >> (8 * (y))))
1550
1551
1552 #ifdef CYASSL_AESNI
1553
1554 /* Each platform needs to query info type 1 from cpuid to see if aesni is
1555  * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts
1556  */
1557
1558 #ifndef _MSC_VER
1559
1560     #define cpuid(reg, func)\
1561         __asm__ __volatile__ ("cpuid":\
1562              "=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3]) :\
1563              "a" (func));
1564
1565     #define XASM_LINK(f) asm(f)
1566 #else
1567
1568     #include <intrin.h>
1569     #define cpuid(a,b) __cpuid((int*)a,b)
1570
1571     #define XASM_LINK(f)
1572
1573 #endif /* _MSC_VER */
1574
1575             
1576 static int Check_CPU_support_AES(void)
1577 {
1578     unsigned int reg[4];  /* put a,b,c,d into 0,1,2,3 */
1579     cpuid(reg, 1);        /* query info 1 */
1580
1581     if (reg[2] & 0x2000000)
1582         return 1;
1583
1584     return 0;
1585 }
1586
1587 static int checkAESNI = 0;
1588 static int haveAESNI  = 0;
1589
1590
1591 /* tell C compiler these are asm functions in case any mix up of ABI underscore
1592    prefix between clang/gcc/llvm etc */
1593 void AES_CBC_encrypt(const unsigned char* in, unsigned char* out,
1594                      unsigned char* ivec, unsigned long length,
1595                      const unsigned char* KS, int nr)
1596                      XASM_LINK("AES_CBC_encrypt");
1597
1598
1599 void AES_CBC_decrypt(const unsigned char* in, unsigned char* out,
1600                      unsigned char* ivec, unsigned long length,
1601                      const unsigned char* KS, int nr)
1602                      XASM_LINK("AES_CBC_decrypt");
1603
1604 void AES_ECB_encrypt(const unsigned char* in, unsigned char* out,
1605                      unsigned long length, const unsigned char* KS, int nr)
1606                      XASM_LINK("AES_ECB_encrypt");
1607
1608
1609 void AES_ECB_decrypt(const unsigned char* in, unsigned char* out,
1610                      unsigned long length, const unsigned char* KS, int nr)
1611                      XASM_LINK("AES_ECB_decrypt");
1612
1613 void AES_128_Key_Expansion(const unsigned char* userkey, 
1614                            unsigned char* key_schedule)
1615                            XASM_LINK("AES_128_Key_Expansion");
1616
1617 void AES_192_Key_Expansion(const unsigned char* userkey, 
1618                            unsigned char* key_schedule)
1619                            XASM_LINK("AES_192_Key_Expansion");
1620
1621 void AES_256_Key_Expansion(const unsigned char* userkey, 
1622                            unsigned char* key_schedule)
1623                            XASM_LINK("AES_256_Key_Expansion");
1624
1625
1626 static int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
1627                                Aes* aes)
1628
1629     if (!userKey || !aes)
1630         return BAD_FUNC_ARG;
1631     
1632     if (bits == 128) {
1633        AES_128_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 10;
1634        return 0;
1635     }
1636     else if (bits == 192) {
1637        AES_192_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 12;
1638        return 0;
1639     }
1640     else if (bits == 256) {
1641        AES_256_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 14;
1642        return 0;
1643     }
1644     return BAD_FUNC_ARG;
1645 }
1646
1647
1648 static int AES_set_decrypt_key(const unsigned char* userKey, const int bits,
1649                                Aes* aes)
1650 {
1651     int nr;
1652     Aes temp_key;
1653     __m128i *Key_Schedule = (__m128i*)aes->key;
1654     __m128i *Temp_Key_Schedule = (__m128i*)temp_key.key;
1655     
1656     if (!userKey || !aes)
1657         return BAD_FUNC_ARG;
1658
1659     if (AES_set_encrypt_key(userKey,bits,&temp_key) == BAD_FUNC_ARG)
1660         return BAD_FUNC_ARG;
1661
1662     nr = temp_key.rounds;
1663     aes->rounds = nr;
1664
1665     Key_Schedule[nr] = Temp_Key_Schedule[0];
1666     Key_Schedule[nr-1] = _mm_aesimc_si128(Temp_Key_Schedule[1]);
1667     Key_Schedule[nr-2] = _mm_aesimc_si128(Temp_Key_Schedule[2]);
1668     Key_Schedule[nr-3] = _mm_aesimc_si128(Temp_Key_Schedule[3]);
1669     Key_Schedule[nr-4] = _mm_aesimc_si128(Temp_Key_Schedule[4]);
1670     Key_Schedule[nr-5] = _mm_aesimc_si128(Temp_Key_Schedule[5]);
1671     Key_Schedule[nr-6] = _mm_aesimc_si128(Temp_Key_Schedule[6]);
1672     Key_Schedule[nr-7] = _mm_aesimc_si128(Temp_Key_Schedule[7]);
1673     Key_Schedule[nr-8] = _mm_aesimc_si128(Temp_Key_Schedule[8]);
1674     Key_Schedule[nr-9] = _mm_aesimc_si128(Temp_Key_Schedule[9]);
1675     
1676     if(nr>10) {
1677         Key_Schedule[nr-10] = _mm_aesimc_si128(Temp_Key_Schedule[10]);
1678         Key_Schedule[nr-11] = _mm_aesimc_si128(Temp_Key_Schedule[11]);
1679     }
1680
1681     if(nr>12) {
1682         Key_Schedule[nr-12] = _mm_aesimc_si128(Temp_Key_Schedule[12]);
1683         Key_Schedule[nr-13] = _mm_aesimc_si128(Temp_Key_Schedule[13]);
1684     }
1685
1686     Key_Schedule[0] = Temp_Key_Schedule[nr];
1687     
1688     return 0;
1689 }
1690
1691
1692
1693 #endif /* CYASSL_AESNI */
1694
1695
1696 static int AesSetKeyLocal(Aes* aes, const byte* userKey, word32 keylen,
1697             const byte* iv, int dir)
1698 {
1699     word32 temp, *rk = aes->key;
1700     unsigned int i = 0;
1701
1702     #ifdef CYASSL_AESNI
1703         aes->use_aesni = 0;
1704     #endif /* CYASSL_AESNI */
1705     #ifdef CYASSL_AES_COUNTER
1706         aes->left = 0;
1707     #endif /* CYASSL_AES_COUNTER */
1708
1709     aes->rounds = keylen/4 + 6;
1710
1711     XMEMCPY(rk, userKey, keylen);
1712     #ifdef LITTLE_ENDIAN_ORDER
1713         ByteReverseWords(rk, rk, keylen);
1714     #endif
1715
1716 #ifdef CYASSL_PIC32MZ_CRYPT
1717     {
1718         word32 *akey1 = aes->key_ce;
1719         word32 *areg = aes->iv_ce ;
1720         aes->keylen = keylen ;
1721         XMEMCPY(akey1, userKey, keylen);
1722         if (iv)
1723             XMEMCPY(areg, iv, AES_BLOCK_SIZE);
1724         else
1725             XMEMSET(areg,  0, AES_BLOCK_SIZE);
1726     }
1727 #endif
1728
1729     switch(keylen)
1730     {
1731     case 16:
1732         while (1)
1733         {
1734             temp  = rk[3];
1735             rk[4] = rk[0] ^
1736                 (Te[4][GETBYTE(temp, 2)] & 0xff000000) ^
1737                 (Te[4][GETBYTE(temp, 1)] & 0x00ff0000) ^
1738                 (Te[4][GETBYTE(temp, 0)] & 0x0000ff00) ^
1739                 (Te[4][GETBYTE(temp, 3)] & 0x000000ff) ^
1740                 rcon[i];
1741             rk[5] = rk[1] ^ rk[4];
1742             rk[6] = rk[2] ^ rk[5];
1743             rk[7] = rk[3] ^ rk[6];
1744             if (++i == 10)
1745                 break;
1746             rk += 4;
1747         }
1748         break;
1749
1750     case 24:
1751         while (1)  /* for (;;) here triggers a bug in VC60 SP4 w/ Pro Pack */
1752         {
1753             temp = rk[ 5];
1754             rk[ 6] = rk[ 0] ^
1755                 (Te[4][GETBYTE(temp, 2)] & 0xff000000) ^
1756                 (Te[4][GETBYTE(temp, 1)] & 0x00ff0000) ^
1757                 (Te[4][GETBYTE(temp, 0)] & 0x0000ff00) ^
1758                 (Te[4][GETBYTE(temp, 3)] & 0x000000ff) ^
1759                 rcon[i];
1760             rk[ 7] = rk[ 1] ^ rk[ 6];
1761             rk[ 8] = rk[ 2] ^ rk[ 7];
1762             rk[ 9] = rk[ 3] ^ rk[ 8];
1763             if (++i == 8)
1764                 break;
1765             rk[10] = rk[ 4] ^ rk[ 9];
1766             rk[11] = rk[ 5] ^ rk[10];
1767             rk += 6;
1768         }
1769         break;
1770
1771     case 32:
1772         while (1)
1773         {
1774             temp = rk[ 7];
1775             rk[ 8] = rk[ 0] ^
1776                 (Te[4][GETBYTE(temp, 2)] & 0xff000000) ^
1777                 (Te[4][GETBYTE(temp, 1)] & 0x00ff0000) ^
1778                 (Te[4][GETBYTE(temp, 0)] & 0x0000ff00) ^
1779                 (Te[4][GETBYTE(temp, 3)] & 0x000000ff) ^
1780                 rcon[i];
1781             rk[ 9] = rk[ 1] ^ rk[ 8];
1782             rk[10] = rk[ 2] ^ rk[ 9];
1783             rk[11] = rk[ 3] ^ rk[10];
1784             if (++i == 7)
1785                 break;
1786             temp = rk[11];
1787             rk[12] = rk[ 4] ^
1788                 (Te[4][GETBYTE(temp, 3)] & 0xff000000) ^
1789                 (Te[4][GETBYTE(temp, 2)] & 0x00ff0000) ^
1790                 (Te[4][GETBYTE(temp, 1)] & 0x0000ff00) ^
1791                 (Te[4][GETBYTE(temp, 0)] & 0x000000ff);
1792             rk[13] = rk[ 5] ^ rk[12];
1793             rk[14] = rk[ 6] ^ rk[13];
1794             rk[15] = rk[ 7] ^ rk[14];
1795
1796             rk += 8;
1797         }
1798         break;
1799
1800     default:
1801         return BAD_FUNC_ARG;
1802     }
1803
1804     if (dir == AES_DECRYPTION)
1805     {
1806         unsigned int j;
1807         rk = aes->key;
1808
1809         /* invert the order of the round keys: */
1810         for (i = 0, j = 4* aes->rounds; i < j; i += 4, j -= 4) {
1811             temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
1812             temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
1813             temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
1814             temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
1815         }
1816         /* apply the inverse MixColumn transform to all round keys but the
1817            first and the last: */
1818         for (i = 1; i < aes->rounds; i++) {
1819             rk += 4;
1820             rk[0] =
1821                 Td[0][Te[4][GETBYTE(rk[0], 3)] & 0xff] ^
1822                 Td[1][Te[4][GETBYTE(rk[0], 2)] & 0xff] ^
1823                 Td[2][Te[4][GETBYTE(rk[0], 1)] & 0xff] ^
1824                 Td[3][Te[4][GETBYTE(rk[0], 0)] & 0xff];
1825             rk[1] =
1826                 Td[0][Te[4][GETBYTE(rk[1], 3)] & 0xff] ^
1827                 Td[1][Te[4][GETBYTE(rk[1], 2)] & 0xff] ^
1828                 Td[2][Te[4][GETBYTE(rk[1], 1)] & 0xff] ^
1829                 Td[3][Te[4][GETBYTE(rk[1], 0)] & 0xff];
1830             rk[2] =
1831                 Td[0][Te[4][GETBYTE(rk[2], 3)] & 0xff] ^
1832                 Td[1][Te[4][GETBYTE(rk[2], 2)] & 0xff] ^
1833                 Td[2][Te[4][GETBYTE(rk[2], 1)] & 0xff] ^
1834                 Td[3][Te[4][GETBYTE(rk[2], 0)] & 0xff];
1835             rk[3] =
1836                 Td[0][Te[4][GETBYTE(rk[3], 3)] & 0xff] ^
1837                 Td[1][Te[4][GETBYTE(rk[3], 2)] & 0xff] ^
1838                 Td[2][Te[4][GETBYTE(rk[3], 1)] & 0xff] ^
1839                 Td[3][Te[4][GETBYTE(rk[3], 0)] & 0xff];
1840         }
1841     }
1842
1843     return AesSetIV(aes, iv);
1844 }
1845
1846
1847 int AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv,
1848               int dir)
1849 {
1850
1851     if (!((keylen == 16) || (keylen == 24) || (keylen == 32)))
1852         return BAD_FUNC_ARG;
1853
1854 #ifdef HAVE_CAVIUM
1855     if (aes->magic == CYASSL_AES_CAVIUM_MAGIC)
1856         return AesCaviumSetKey(aes, userKey, keylen, iv);
1857 #endif
1858
1859 #ifdef CYASSL_AESNI
1860     if (checkAESNI == 0) {
1861         haveAESNI  = Check_CPU_support_AES();
1862         checkAESNI = 1;
1863     }
1864     if (haveAESNI) {
1865         aes->use_aesni = 1;
1866         if (iv)
1867             XMEMCPY(aes->reg, iv, AES_BLOCK_SIZE);
1868         if (dir == AES_ENCRYPTION)
1869             return AES_set_encrypt_key(userKey, keylen * 8, aes);
1870         else
1871             return AES_set_decrypt_key(userKey, keylen * 8, aes);
1872     }
1873 #endif /* CYASSL_AESNI */
1874
1875     return AesSetKeyLocal(aes, userKey, keylen, iv, dir);
1876 }
1877
1878
1879 static void AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock)
1880 {
1881     word32 s0, s1, s2, s3;
1882     word32 t0, t1, t2, t3;
1883     word32 r = aes->rounds >> 1;
1884
1885     const word32* rk = aes->key;
1886     if (r > 7 || r == 0) {
1887         CYASSL_MSG("AesEncrypt encountered improper key, set it up");
1888         return;  /* stop instead of segfaulting, set up your keys! */
1889     }
1890 #ifdef CYASSL_AESNI
1891     if (haveAESNI && aes->use_aesni) {
1892         #ifdef DEBUG_AESNI
1893             printf("about to aes encrypt\n");
1894             printf("in  = %p\n", inBlock);
1895             printf("out = %p\n", outBlock);
1896             printf("aes->key = %p\n", aes->key);
1897             printf("aes->rounds = %d\n", aes->rounds);
1898             printf("sz = %d\n", AES_BLOCK_SIZE);
1899         #endif
1900
1901         /* check alignment, decrypt doesn't need alignment */
1902         if ((word)inBlock % 16) {
1903         #ifndef NO_CYASSL_ALLOC_ALIGN
1904             byte* tmp = (byte*)XMALLOC(AES_BLOCK_SIZE, NULL,
1905                                                       DYNAMIC_TYPE_TMP_BUFFER);
1906             if (tmp == NULL) return;
1907
1908             XMEMCPY(tmp, inBlock, AES_BLOCK_SIZE);
1909             AES_ECB_encrypt(tmp, tmp, AES_BLOCK_SIZE, (byte*)aes->key,
1910                             aes->rounds);
1911             XMEMCPY(outBlock, tmp, AES_BLOCK_SIZE);
1912             XFREE(tmp, NULL, DYNAMIC_TYPE_TMP_BUFFER);
1913             return;
1914         #else
1915             CYASSL_MSG("AES-ECB encrypt with bad alignment");
1916             return;
1917         #endif
1918         }
1919
1920         AES_ECB_encrypt(inBlock, outBlock, AES_BLOCK_SIZE, (byte*)aes->key,
1921                         aes->rounds);
1922
1923         return;
1924     }
1925     else {
1926         #ifdef DEBUG_AESNI
1927             printf("Skipping AES-NI\n");
1928         #endif
1929     }
1930 #endif
1931
1932     /*
1933      * map byte array block to cipher state
1934      * and add initial round key:
1935      */
1936     XMEMCPY(&s0, inBlock,                  sizeof(s0));
1937     XMEMCPY(&s1, inBlock + sizeof(s0),     sizeof(s1));
1938     XMEMCPY(&s2, inBlock + 2 * sizeof(s0), sizeof(s2));
1939     XMEMCPY(&s3, inBlock + 3 * sizeof(s0), sizeof(s3));
1940
1941     #ifdef LITTLE_ENDIAN_ORDER
1942         s0 = ByteReverseWord32(s0);
1943         s1 = ByteReverseWord32(s1);
1944         s2 = ByteReverseWord32(s2);
1945         s3 = ByteReverseWord32(s3);
1946     #endif
1947
1948     s0 ^= rk[0];
1949     s1 ^= rk[1];
1950     s2 ^= rk[2];
1951     s3 ^= rk[3];
1952    
1953     /*
1954      * Nr - 1 full rounds:
1955      */
1956
1957     for (;;) {
1958         t0 =
1959             Te[0][GETBYTE(s0, 3)]  ^
1960             Te[1][GETBYTE(s1, 2)]  ^
1961             Te[2][GETBYTE(s2, 1)]  ^
1962             Te[3][GETBYTE(s3, 0)]  ^
1963             rk[4];
1964         t1 =
1965             Te[0][GETBYTE(s1, 3)]  ^
1966             Te[1][GETBYTE(s2, 2)]  ^
1967             Te[2][GETBYTE(s3, 1)]  ^
1968             Te[3][GETBYTE(s0, 0)]  ^
1969             rk[5];
1970         t2 =
1971             Te[0][GETBYTE(s2, 3)] ^
1972             Te[1][GETBYTE(s3, 2)]  ^
1973             Te[2][GETBYTE(s0, 1)]  ^
1974             Te[3][GETBYTE(s1, 0)]  ^
1975             rk[6];
1976         t3 =
1977             Te[0][GETBYTE(s3, 3)] ^
1978             Te[1][GETBYTE(s0, 2)]  ^
1979             Te[2][GETBYTE(s1, 1)]  ^
1980             Te[3][GETBYTE(s2, 0)]  ^
1981             rk[7];
1982
1983         rk += 8;
1984         if (--r == 0) {
1985             break;
1986         }
1987         
1988         s0 =
1989             Te[0][GETBYTE(t0, 3)] ^
1990             Te[1][GETBYTE(t1, 2)] ^
1991             Te[2][GETBYTE(t2, 1)] ^
1992             Te[3][GETBYTE(t3, 0)] ^
1993             rk[0];
1994         s1 =
1995             Te[0][GETBYTE(t1, 3)] ^
1996             Te[1][GETBYTE(t2, 2)] ^
1997             Te[2][GETBYTE(t3, 1)] ^
1998             Te[3][GETBYTE(t0, 0)] ^
1999             rk[1];
2000         s2 =
2001             Te[0][GETBYTE(t2, 3)] ^
2002             Te[1][GETBYTE(t3, 2)] ^
2003             Te[2][GETBYTE(t0, 1)] ^
2004             Te[3][GETBYTE(t1, 0)] ^
2005             rk[2];
2006         s3 =
2007             Te[0][GETBYTE(t3, 3)] ^
2008             Te[1][GETBYTE(t0, 2)] ^
2009             Te[2][GETBYTE(t1, 1)] ^
2010             Te[3][GETBYTE(t2, 0)] ^
2011             rk[3];
2012     }
2013
2014     /*
2015      * apply last round and
2016      * map cipher state to byte array block:
2017      */
2018
2019     s0 =
2020         (Te[4][GETBYTE(t0, 3)] & 0xff000000) ^
2021         (Te[4][GETBYTE(t1, 2)] & 0x00ff0000) ^
2022         (Te[4][GETBYTE(t2, 1)] & 0x0000ff00) ^
2023         (Te[4][GETBYTE(t3, 0)] & 0x000000ff) ^
2024         rk[0];
2025     s1 =
2026         (Te[4][GETBYTE(t1, 3)] & 0xff000000) ^
2027         (Te[4][GETBYTE(t2, 2)] & 0x00ff0000) ^
2028         (Te[4][GETBYTE(t3, 1)] & 0x0000ff00) ^
2029         (Te[4][GETBYTE(t0, 0)] & 0x000000ff) ^
2030         rk[1];
2031     s2 =
2032         (Te[4][GETBYTE(t2, 3)] & 0xff000000) ^
2033         (Te[4][GETBYTE(t3, 2)] & 0x00ff0000) ^
2034         (Te[4][GETBYTE(t0, 1)] & 0x0000ff00) ^
2035         (Te[4][GETBYTE(t1, 0)] & 0x000000ff) ^
2036         rk[2];
2037     s3 =
2038         (Te[4][GETBYTE(t3, 3)] & 0xff000000) ^
2039         (Te[4][GETBYTE(t0, 2)] & 0x00ff0000) ^
2040         (Te[4][GETBYTE(t1, 1)] & 0x0000ff00) ^
2041         (Te[4][GETBYTE(t2, 0)] & 0x000000ff) ^
2042         rk[3];
2043
2044     /* write out */
2045     #ifdef LITTLE_ENDIAN_ORDER
2046         s0 = ByteReverseWord32(s0);
2047         s1 = ByteReverseWord32(s1);
2048         s2 = ByteReverseWord32(s2);
2049         s3 = ByteReverseWord32(s3);
2050     #endif
2051
2052     XMEMCPY(outBlock,                  &s0, sizeof(s0));
2053     XMEMCPY(outBlock + sizeof(s0),     &s1, sizeof(s1));
2054     XMEMCPY(outBlock + 2 * sizeof(s0), &s2, sizeof(s2));
2055     XMEMCPY(outBlock + 3 * sizeof(s0), &s3, sizeof(s3));
2056 }
2057
2058
2059 static void AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock)
2060 {
2061     word32 s0, s1, s2, s3;
2062     word32 t0, t1, t2, t3;
2063     word32 r = aes->rounds >> 1;
2064
2065     const word32* rk = aes->key;
2066     if (r > 7 || r == 0) {
2067         CYASSL_MSG("AesDecrypt encountered improper key, set it up");
2068         return;  /* stop instead of segfaulting, set up your keys! */
2069     }
2070 #ifdef CYASSL_AESNI
2071     if (haveAESNI && aes->use_aesni) {
2072         #ifdef DEBUG_AESNI
2073             printf("about to aes decrypt\n");
2074             printf("in  = %p\n", inBlock);
2075             printf("out = %p\n", outBlock);
2076             printf("aes->key = %p\n", aes->key);
2077             printf("aes->rounds = %d\n", aes->rounds);
2078             printf("sz = %d\n", AES_BLOCK_SIZE);
2079         #endif
2080
2081         /* if input and output same will overwrite input iv */
2082         XMEMCPY(aes->tmp, inBlock, AES_BLOCK_SIZE);
2083         AES_ECB_decrypt(inBlock, outBlock, AES_BLOCK_SIZE, (byte*)aes->key,
2084                         aes->rounds);
2085         return;
2086     }
2087     else {
2088         #ifdef DEBUG_AESNI
2089             printf("Skipping AES-NI\n");
2090         #endif
2091     }
2092 #endif
2093
2094     /*
2095      * map byte array block to cipher state
2096      * and add initial round key:
2097      */
2098     XMEMCPY(&s0, inBlock,                  sizeof(s0));
2099     XMEMCPY(&s1, inBlock + sizeof(s0),     sizeof(s1));
2100     XMEMCPY(&s2, inBlock + 2 * sizeof(s0), sizeof(s2));
2101     XMEMCPY(&s3, inBlock + 3 * sizeof(s0), sizeof(s3));
2102
2103     #ifdef LITTLE_ENDIAN_ORDER
2104         s0 = ByteReverseWord32(s0);
2105         s1 = ByteReverseWord32(s1);
2106         s2 = ByteReverseWord32(s2);
2107         s3 = ByteReverseWord32(s3);
2108     #endif
2109
2110     s0 ^= rk[0];
2111     s1 ^= rk[1];
2112     s2 ^= rk[2];
2113     s3 ^= rk[3];
2114    
2115     /*
2116      * Nr - 1 full rounds:
2117      */
2118
2119     for (;;) {
2120         t0 =
2121             Td[0][GETBYTE(s0, 3)] ^
2122             Td[1][GETBYTE(s3, 2)] ^
2123             Td[2][GETBYTE(s2, 1)] ^
2124             Td[3][GETBYTE(s1, 0)] ^
2125             rk[4];
2126         t1 =
2127             Td[0][GETBYTE(s1, 3)] ^
2128             Td[1][GETBYTE(s0, 2)] ^
2129             Td[2][GETBYTE(s3, 1)] ^
2130             Td[3][GETBYTE(s2, 0)] ^
2131             rk[5];
2132         t2 =
2133             Td[0][GETBYTE(s2, 3)] ^
2134             Td[1][GETBYTE(s1, 2)] ^
2135             Td[2][GETBYTE(s0, 1)] ^
2136             Td[3][GETBYTE(s3, 0)] ^
2137             rk[6];
2138         t3 =
2139             Td[0][GETBYTE(s3, 3)] ^
2140             Td[1][GETBYTE(s2, 2)] ^
2141             Td[2][GETBYTE(s1, 1)] ^
2142             Td[3][GETBYTE(s0, 0)] ^
2143             rk[7];
2144
2145         rk += 8;
2146         if (--r == 0) {
2147             break;
2148         }
2149
2150         s0 =
2151             Td[0][GETBYTE(t0, 3)] ^
2152             Td[1][GETBYTE(t3, 2)] ^
2153             Td[2][GETBYTE(t2, 1)] ^
2154             Td[3][GETBYTE(t1, 0)] ^
2155             rk[0];
2156         s1 =
2157             Td[0][GETBYTE(t1, 3)] ^
2158             Td[1][GETBYTE(t0, 2)] ^
2159             Td[2][GETBYTE(t3, 1)] ^
2160             Td[3][GETBYTE(t2, 0)] ^
2161             rk[1];
2162         s2 =
2163             Td[0][GETBYTE(t2, 3)] ^
2164             Td[1][GETBYTE(t1, 2)] ^
2165             Td[2][GETBYTE(t0, 1)] ^
2166             Td[3][GETBYTE(t3, 0)] ^
2167             rk[2];
2168         s3 =
2169             Td[0][GETBYTE(t3, 3)] ^
2170             Td[1][GETBYTE(t2, 2)] ^
2171             Td[2][GETBYTE(t1, 1)] ^
2172             Td[3][GETBYTE(t0, 0)] ^
2173             rk[3];
2174     }
2175     /*
2176      * apply last round and
2177      * map cipher state to byte array block:
2178      */
2179     s0 =
2180         (Td[4][GETBYTE(t0, 3)] & 0xff000000) ^
2181         (Td[4][GETBYTE(t3, 2)] & 0x00ff0000) ^
2182         (Td[4][GETBYTE(t2, 1)] & 0x0000ff00) ^
2183         (Td[4][GETBYTE(t1, 0)] & 0x000000ff) ^
2184         rk[0];
2185     s1 =
2186         (Td[4][GETBYTE(t1, 3)] & 0xff000000) ^
2187         (Td[4][GETBYTE(t0, 2)] & 0x00ff0000) ^
2188         (Td[4][GETBYTE(t3, 1)] & 0x0000ff00) ^
2189         (Td[4][GETBYTE(t2, 0)] & 0x000000ff) ^
2190         rk[1];
2191     s2 =
2192         (Td[4][GETBYTE(t2, 3)] & 0xff000000) ^
2193         (Td[4][GETBYTE(t1, 2)] & 0x00ff0000) ^
2194         (Td[4][GETBYTE(t0, 1)] & 0x0000ff00) ^
2195         (Td[4][GETBYTE(t3, 0)] & 0x000000ff) ^
2196         rk[2];
2197     s3 =
2198         (Td[4][GETBYTE(t3, 3)] & 0xff000000) ^
2199         (Td[4][GETBYTE(t2, 2)] & 0x00ff0000) ^
2200         (Td[4][GETBYTE(t1, 1)] & 0x0000ff00) ^
2201         (Td[4][GETBYTE(t0, 0)] & 0x000000ff) ^
2202         rk[3];
2203
2204     /* write out */
2205     #ifdef LITTLE_ENDIAN_ORDER
2206         s0 = ByteReverseWord32(s0);
2207         s1 = ByteReverseWord32(s1);
2208         s2 = ByteReverseWord32(s2);
2209         s3 = ByteReverseWord32(s3);
2210     #endif
2211
2212     XMEMCPY(outBlock,                  &s0, sizeof(s0));
2213     XMEMCPY(outBlock + sizeof(s0),     &s1, sizeof(s1));
2214     XMEMCPY(outBlock + 2 * sizeof(s0), &s2, sizeof(s2));
2215     XMEMCPY(outBlock + 3 * sizeof(s0), &s3, sizeof(s3));
2216 }
2217
2218 #ifndef HAVE_AES_ENGINE
2219 int AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
2220 {
2221     word32 blocks = sz / AES_BLOCK_SIZE;
2222
2223 #ifdef HAVE_CAVIUM
2224     if (aes->magic == CYASSL_AES_CAVIUM_MAGIC)
2225         return AesCaviumCbcEncrypt(aes, out, in, sz);
2226 #endif
2227
2228 #ifdef CYASSL_AESNI
2229     if (haveAESNI) {
2230         #ifdef DEBUG_AESNI
2231             printf("about to aes cbc encrypt\n");
2232             printf("in  = %p\n", in);
2233             printf("out = %p\n", out);
2234             printf("aes->key = %p\n", aes->key);
2235             printf("aes->reg = %p\n", aes->reg);
2236             printf("aes->rounds = %d\n", aes->rounds);
2237             printf("sz = %d\n", sz);
2238         #endif
2239
2240         /* check alignment, decrypt doesn't need alignment */
2241         if ((word)in % 16) {
2242         #ifndef NO_CYASSL_ALLOC_ALIGN
2243             byte* tmp = (byte*)XMALLOC(sz, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2244             CYASSL_MSG("AES-CBC encrypt with bad alignment");
2245             if (tmp == NULL) return MEMORY_E;
2246
2247             XMEMCPY(tmp, in, sz);
2248             AES_CBC_encrypt(tmp, tmp, (byte*)aes->reg, sz, (byte*)aes->key,
2249                         aes->rounds);
2250             /* store iv for next call */
2251             XMEMCPY(aes->reg, tmp + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
2252
2253             XMEMCPY(out, tmp, sz);
2254             XFREE(tmp, NULL, DYNAMIC_TYPE_TMP_BUFFER);
2255             return 0;
2256         #else
2257             return BAD_ALIGN_E;
2258         #endif
2259         }
2260
2261         AES_CBC_encrypt(in, out, (byte*)aes->reg, sz, (byte*)aes->key,
2262                         aes->rounds);
2263         /* store iv for next call */
2264         XMEMCPY(aes->reg, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
2265
2266         return 0;
2267     }
2268 #endif
2269
2270     while (blocks--) {
2271         xorbuf((byte*)aes->reg, in, AES_BLOCK_SIZE);
2272         AesEncrypt(aes, (byte*)aes->reg, (byte*)aes->reg);
2273         XMEMCPY(out, aes->reg, AES_BLOCK_SIZE);
2274
2275         out += AES_BLOCK_SIZE;
2276         in  += AES_BLOCK_SIZE; 
2277     }
2278
2279     return 0;
2280 }
2281
2282
2283 int AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz)
2284 {
2285     word32 blocks = sz / AES_BLOCK_SIZE;
2286
2287 #ifdef HAVE_CAVIUM
2288     if (aes->magic == CYASSL_AES_CAVIUM_MAGIC)
2289         return AesCaviumCbcDecrypt(aes, out, in, sz);
2290 #endif
2291
2292 #ifdef CYASSL_AESNI
2293     if (haveAESNI) {
2294         #ifdef DEBUG_AESNI
2295             printf("about to aes cbc decrypt\n");
2296             printf("in  = %p\n", in);
2297             printf("out = %p\n", out);
2298             printf("aes->key = %p\n", aes->key);
2299             printf("aes->reg = %p\n", aes->reg);
2300             printf("aes->rounds = %d\n", aes->rounds);
2301             printf("sz = %d\n", sz);
2302         #endif
2303
2304         /* if input and output same will overwrite input iv */
2305         XMEMCPY(aes->tmp, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
2306         AES_CBC_decrypt(in, out, (byte*)aes->reg, sz, (byte*)aes->key,
2307                         aes->rounds);
2308         /* store iv for next call */
2309         XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE);
2310         return 0;
2311     }
2312 #endif
2313
2314     while (blocks--) {
2315         XMEMCPY(aes->tmp, in, AES_BLOCK_SIZE);
2316         AesDecrypt(aes, (byte*)aes->tmp, out);
2317         xorbuf(out, (byte*)aes->reg, AES_BLOCK_SIZE);
2318         XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE);
2319
2320         out += AES_BLOCK_SIZE;
2321         in  += AES_BLOCK_SIZE; 
2322     }
2323
2324     return 0;
2325 }
2326 #endif
2327
2328 #ifdef CYASSL_AES_DIRECT
2329
2330 /* Allow direct access to one block encrypt */
2331 void AesEncryptDirect(Aes* aes, byte* out, const byte* in)
2332 {
2333     return AesEncrypt(aes, in, out);
2334 }
2335
2336
2337 /* Allow direct access to one block decrypt */
2338 void AesDecryptDirect(Aes* aes, byte* out, const byte* in)
2339 {
2340     return AesDecrypt(aes, in, out);
2341 }
2342
2343
2344 #endif /* CYASSL_AES_DIRECT */
2345
2346
2347 #if defined(CYASSL_AES_DIRECT) || defined(CYASSL_AES_COUNTER)
2348
2349 /* AES-CTR and AES-DIRECT need to use this for key setup, no aesni yet */
2350 int AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen,
2351                     const byte* iv, int dir)
2352 {
2353     return AesSetKeyLocal(aes, userKey, keylen, iv, dir);
2354 }
2355
2356 #endif /* CYASSL_AES_DIRECT || CYASSL_AES_COUNTER */
2357
2358
2359 #if defined(CYASSL_AES_COUNTER) && !defined(HAVE_AES_ENGINE)
2360
2361 /* Increment AES counter */
2362 static INLINE void IncrementAesCounter(byte* inOutCtr)
2363 {
2364     int i;
2365
2366     /* in network byte order so start at end and work back */
2367     for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) {
2368         if (++inOutCtr[i])  /* we're done unless we overflow */
2369             return;
2370     }
2371 }
2372   
2373
2374 void AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz)
2375 {
2376     byte* tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left;
2377
2378     /* consume any unused bytes left in aes->tmp */
2379     while (aes->left && sz) {
2380        *(out++) = *(in++) ^ *(tmp++);
2381        aes->left--;
2382        sz--;
2383     }
2384
2385     /* do as many block size ops as possible */
2386     while (sz >= AES_BLOCK_SIZE) {
2387         AesEncrypt(aes, (byte*)aes->reg, out);
2388         IncrementAesCounter((byte*)aes->reg);
2389         xorbuf(out, in, AES_BLOCK_SIZE);
2390
2391         out += AES_BLOCK_SIZE;
2392         in  += AES_BLOCK_SIZE;
2393         sz  -= AES_BLOCK_SIZE;
2394         aes->left = 0;
2395     }
2396
2397     /* handle non block size remaining and sotre unused byte count in left */
2398     if (sz) {
2399         AesEncrypt(aes, (byte*)aes->reg, (byte*)aes->tmp);
2400         IncrementAesCounter((byte*)aes->reg);
2401
2402         aes->left = AES_BLOCK_SIZE;
2403         tmp = (byte*)aes->tmp;
2404
2405         while (sz--) {
2406             *(out++) = *(in++) ^ *(tmp++);
2407             aes->left--;
2408         }
2409     }
2410 }
2411
2412 #endif /* CYASSL_AES_COUNTER */
2413
2414
2415 #ifdef HAVE_AESGCM
2416
2417 /*
2418  * The IV for AES GCM, stored in struct Aes's member reg, is comprised of
2419  * three parts in order:
2420  *   1. The implicit IV. This is generated from the PRF using the shared
2421  *      secrets between endpoints. It is 4 bytes long.
2422  *   2. The explicit IV. This is set by the user of the AES. It needs to be
2423  *      unique for each call to encrypt. The explicit IV is shared with the
2424  *      other end of the transaction in the clear.
2425  *   3. The counter. Each block of data is encrypted with its own sequence
2426  *      number counter.
2427  */
2428
2429 enum {
2430     CTR_SZ = 4
2431 };
2432
2433
2434 static INLINE void InitGcmCounter(byte* inOutCtr)
2435 {
2436     inOutCtr[AES_BLOCK_SIZE - 4] = 0;
2437     inOutCtr[AES_BLOCK_SIZE - 3] = 0;
2438     inOutCtr[AES_BLOCK_SIZE - 2] = 0;
2439     inOutCtr[AES_BLOCK_SIZE - 1] = 1;
2440 }
2441
2442
2443 static INLINE void IncrementGcmCounter(byte* inOutCtr)
2444 {
2445     int i;
2446
2447     /* in network byte order so start at end and work back */
2448     for (i = AES_BLOCK_SIZE - 1; i >= AES_BLOCK_SIZE - CTR_SZ; i--) {
2449         if (++inOutCtr[i])  /* we're done unless we overflow */
2450             return;
2451     }
2452 }
2453
2454
2455 #if defined(GCM_SMALL) || defined(GCM_TABLE)
2456
2457 static INLINE void FlattenSzInBits(byte* buf, word32 sz)
2458 {
2459     /* Multiply the sz by 8 */
2460     word32 szHi = (sz >> (8*sizeof(sz) - 3));
2461     sz <<= 3;
2462
2463     /* copy over the words of the sz into the destination buffer */
2464     buf[0] = (szHi >> 24) & 0xff;
2465     buf[1] = (szHi >> 16) & 0xff;
2466     buf[2] = (szHi >>  8) & 0xff;
2467     buf[3] = szHi & 0xff;
2468     buf[4] = (sz >> 24) & 0xff;
2469     buf[5] = (sz >> 16) & 0xff;
2470     buf[6] = (sz >>  8) & 0xff;
2471     buf[7] = sz & 0xff;
2472 }
2473
2474
2475 static INLINE void RIGHTSHIFTX(byte* x)
2476 {
2477     int i;
2478     int carryOut = 0;
2479     int carryIn = 0;
2480     int borrow = x[15] & 0x01;
2481
2482     for (i = 0; i < AES_BLOCK_SIZE; i++) {
2483         carryOut = x[i] & 0x01;
2484         x[i] = (x[i] >> 1) | (carryIn ? 0x80 : 0);
2485         carryIn = carryOut;
2486     }
2487     if (borrow) x[0] ^= 0xE1;
2488 }
2489
2490 #endif /* defined(GCM_SMALL) || defined(GCM_TABLE) */
2491
2492
2493 #ifdef GCM_TABLE
2494
2495 static void GenerateM0(Aes* aes)
2496 {
2497     int i, j;
2498     byte (*m)[AES_BLOCK_SIZE] = aes->M0;
2499
2500     XMEMCPY(m[128], aes->H, AES_BLOCK_SIZE);
2501
2502     for (i = 64; i > 0; i /= 2) {
2503         XMEMCPY(m[i], m[i*2], AES_BLOCK_SIZE);
2504         RIGHTSHIFTX(m[i]);
2505     }
2506
2507     for (i = 2; i < 256; i *= 2) {
2508         for (j = 1; j < i; j++) {
2509             XMEMCPY(m[i+j], m[i], AES_BLOCK_SIZE);
2510             xorbuf(m[i+j], m[j], AES_BLOCK_SIZE);
2511         }
2512     }
2513
2514     XMEMSET(m[0], 0, AES_BLOCK_SIZE);
2515 }
2516
2517 #endif /* GCM_TABLE */
2518
2519
2520 void AesGcmSetKey(Aes* aes, const byte* key, word32 len)
2521 {
2522     byte iv[AES_BLOCK_SIZE];
2523
2524     if (!((len == 16) || (len == 24) || (len == 32)))
2525         return;
2526
2527     XMEMSET(iv, 0, AES_BLOCK_SIZE);
2528     AesSetKey(aes, key, len, iv, AES_ENCRYPTION);
2529
2530     AesEncrypt(aes, iv, aes->H);
2531 #ifdef GCM_TABLE
2532     GenerateM0(aes);
2533 #endif /* GCM_TABLE */
2534 }
2535
2536
2537 #if defined(GCM_SMALL)
2538
2539 static void GMULT(byte* X, byte* Y)
2540 {
2541     byte Z[AES_BLOCK_SIZE];
2542     byte V[AES_BLOCK_SIZE];
2543     int i, j;
2544
2545     XMEMSET(Z, 0, AES_BLOCK_SIZE);
2546     XMEMCPY(V, X, AES_BLOCK_SIZE);
2547     for (i = 0; i < AES_BLOCK_SIZE; i++)
2548     {
2549         byte y = Y[i];
2550         for (j = 0; j < 8; j++)
2551         {
2552             if (y & 0x80) {
2553                 xorbuf(Z, V, AES_BLOCK_SIZE);
2554             }
2555
2556             RIGHTSHIFTX(V);
2557             y = y << 1;
2558         }
2559     }
2560     XMEMCPY(X, Z, AES_BLOCK_SIZE);
2561 }
2562
2563
2564 static void GHASH(Aes* aes, const byte* a, word32 aSz,
2565                                 const byte* c, word32 cSz, byte* s, word32 sSz)
2566 {
2567     byte x[AES_BLOCK_SIZE];
2568     byte scratch[AES_BLOCK_SIZE];
2569     word32 blocks, partial;
2570     byte* h = aes->H;
2571
2572     XMEMSET(x, 0, AES_BLOCK_SIZE);
2573
2574     /* Hash in A, the Additional Authentication Data */
2575     if (aSz != 0 && a != NULL) {
2576         blocks = aSz / AES_BLOCK_SIZE;
2577         partial = aSz % AES_BLOCK_SIZE;
2578         while (blocks--) {
2579             xorbuf(x, a, AES_BLOCK_SIZE);
2580             GMULT(x, h);
2581             a += AES_BLOCK_SIZE;
2582         }
2583         if (partial != 0) {
2584             XMEMSET(scratch, 0, AES_BLOCK_SIZE);
2585             XMEMCPY(scratch, a, partial);
2586             xorbuf(x, scratch, AES_BLOCK_SIZE);
2587             GMULT(x, h);
2588         }
2589     }
2590
2591     /* Hash in C, the Ciphertext */
2592     if (cSz != 0 && c != NULL) {
2593         blocks = cSz / AES_BLOCK_SIZE;
2594         partial = cSz % AES_BLOCK_SIZE;
2595         while (blocks--) {
2596             xorbuf(x, c, AES_BLOCK_SIZE);
2597             GMULT(x, h);
2598             c += AES_BLOCK_SIZE;
2599         }
2600         if (partial != 0) {
2601             XMEMSET(scratch, 0, AES_BLOCK_SIZE);
2602             XMEMCPY(scratch, c, partial);
2603             xorbuf(x, scratch, AES_BLOCK_SIZE);
2604             GMULT(x, h);
2605         }
2606     }
2607
2608     /* Hash in the lengths of A and C in bits */
2609     FlattenSzInBits(&scratch[0], aSz);
2610     FlattenSzInBits(&scratch[8], cSz);
2611     xorbuf(x, scratch, AES_BLOCK_SIZE);
2612     GMULT(x, h);
2613
2614     /* Copy the result into s. */
2615     XMEMCPY(s, x, sSz);
2616 }
2617
2618 /* end GCM_SMALL */
2619 #elif defined(GCM_TABLE)
2620
2621 static const byte R[256][2] = {
2622     {0x00, 0x00}, {0x01, 0xc2}, {0x03, 0x84}, {0x02, 0x46},
2623     {0x07, 0x08}, {0x06, 0xca}, {0x04, 0x8c}, {0x05, 0x4e},
2624     {0x0e, 0x10}, {0x0f, 0xd2}, {0x0d, 0x94}, {0x0c, 0x56},
2625     {0x09, 0x18}, {0x08, 0xda}, {0x0a, 0x9c}, {0x0b, 0x5e},
2626     {0x1c, 0x20}, {0x1d, 0xe2}, {0x1f, 0xa4}, {0x1e, 0x66},
2627     {0x1b, 0x28}, {0x1a, 0xea}, {0x18, 0xac}, {0x19, 0x6e},
2628     {0x12, 0x30}, {0x13, 0xf2}, {0x11, 0xb4}, {0x10, 0x76},
2629     {0x15, 0x38}, {0x14, 0xfa}, {0x16, 0xbc}, {0x17, 0x7e},
2630     {0x38, 0x40}, {0x39, 0x82}, {0x3b, 0xc4}, {0x3a, 0x06},
2631     {0x3f, 0x48}, {0x3e, 0x8a}, {0x3c, 0xcc}, {0x3d, 0x0e},
2632     {0x36, 0x50}, {0x37, 0x92}, {0x35, 0xd4}, {0x34, 0x16},
2633     {0x31, 0x58}, {0x30, 0x9a}, {0x32, 0xdc}, {0x33, 0x1e},
2634     {0x24, 0x60}, {0x25, 0xa2}, {0x27, 0xe4}, {0x26, 0x26},
2635     {0x23, 0x68}, {0x22, 0xaa}, {0x20, 0xec}, {0x21, 0x2e},
2636     {0x2a, 0x70}, {0x2b, 0xb2}, {0x29, 0xf4}, {0x28, 0x36},
2637     {0x2d, 0x78}, {0x2c, 0xba}, {0x2e, 0xfc}, {0x2f, 0x3e},
2638     {0x70, 0x80}, {0x71, 0x42}, {0x73, 0x04}, {0x72, 0xc6},
2639     {0x77, 0x88}, {0x76, 0x4a}, {0x74, 0x0c}, {0x75, 0xce},
2640     {0x7e, 0x90}, {0x7f, 0x52}, {0x7d, 0x14}, {0x7c, 0xd6},
2641     {0x79, 0x98}, {0x78, 0x5a}, {0x7a, 0x1c}, {0x7b, 0xde},
2642     {0x6c, 0xa0}, {0x6d, 0x62}, {0x6f, 0x24}, {0x6e, 0xe6},
2643     {0x6b, 0xa8}, {0x6a, 0x6a}, {0x68, 0x2c}, {0x69, 0xee},
2644     {0x62, 0xb0}, {0x63, 0x72}, {0x61, 0x34}, {0x60, 0xf6},
2645     {0x65, 0xb8}, {0x64, 0x7a}, {0x66, 0x3c}, {0x67, 0xfe},
2646     {0x48, 0xc0}, {0x49, 0x02}, {0x4b, 0x44}, {0x4a, 0x86},
2647     {0x4f, 0xc8}, {0x4e, 0x0a}, {0x4c, 0x4c}, {0x4d, 0x8e},
2648     {0x46, 0xd0}, {0x47, 0x12}, {0x45, 0x54}, {0x44, 0x96},
2649     {0x41, 0xd8}, {0x40, 0x1a}, {0x42, 0x5c}, {0x43, 0x9e},
2650     {0x54, 0xe0}, {0x55, 0x22}, {0x57, 0x64}, {0x56, 0xa6},
2651     {0x53, 0xe8}, {0x52, 0x2a}, {0x50, 0x6c}, {0x51, 0xae},
2652     {0x5a, 0xf0}, {0x5b, 0x32}, {0x59, 0x74}, {0x58, 0xb6},
2653     {0x5d, 0xf8}, {0x5c, 0x3a}, {0x5e, 0x7c}, {0x5f, 0xbe},
2654     {0xe1, 0x00}, {0xe0, 0xc2}, {0xe2, 0x84}, {0xe3, 0x46},
2655     {0xe6, 0x08}, {0xe7, 0xca}, {0xe5, 0x8c}, {0xe4, 0x4e},
2656     {0xef, 0x10}, {0xee, 0xd2}, {0xec, 0x94}, {0xed, 0x56},
2657     {0xe8, 0x18}, {0xe9, 0xda}, {0xeb, 0x9c}, {0xea, 0x5e},
2658     {0xfd, 0x20}, {0xfc, 0xe2}, {0xfe, 0xa4}, {0xff, 0x66},
2659     {0xfa, 0x28}, {0xfb, 0xea}, {0xf9, 0xac}, {0xf8, 0x6e},
2660     {0xf3, 0x30}, {0xf2, 0xf2}, {0xf0, 0xb4}, {0xf1, 0x76},
2661     {0xf4, 0x38}, {0xf5, 0xfa}, {0xf7, 0xbc}, {0xf6, 0x7e},
2662     {0xd9, 0x40}, {0xd8, 0x82}, {0xda, 0xc4}, {0xdb, 0x06},
2663     {0xde, 0x48}, {0xdf, 0x8a}, {0xdd, 0xcc}, {0xdc, 0x0e},
2664     {0xd7, 0x50}, {0xd6, 0x92}, {0xd4, 0xd4}, {0xd5, 0x16},
2665     {0xd0, 0x58}, {0xd1, 0x9a}, {0xd3, 0xdc}, {0xd2, 0x1e},
2666     {0xc5, 0x60}, {0xc4, 0xa2}, {0xc6, 0xe4}, {0xc7, 0x26},
2667     {0xc2, 0x68}, {0xc3, 0xaa}, {0xc1, 0xec}, {0xc0, 0x2e},
2668     {0xcb, 0x70}, {0xca, 0xb2}, {0xc8, 0xf4}, {0xc9, 0x36},
2669     {0xcc, 0x78}, {0xcd, 0xba}, {0xcf, 0xfc}, {0xce, 0x3e},
2670     {0x91, 0x80}, {0x90, 0x42}, {0x92, 0x04}, {0x93, 0xc6},
2671     {0x96, 0x88}, {0x97, 0x4a}, {0x95, 0x0c}, {0x94, 0xce},
2672     {0x9f, 0x90}, {0x9e, 0x52}, {0x9c, 0x14}, {0x9d, 0xd6},
2673     {0x98, 0x98}, {0x99, 0x5a}, {0x9b, 0x1c}, {0x9a, 0xde},
2674     {0x8d, 0xa0}, {0x8c, 0x62}, {0x8e, 0x24}, {0x8f, 0xe6},
2675     {0x8a, 0xa8}, {0x8b, 0x6a}, {0x89, 0x2c}, {0x88, 0xee},
2676     {0x83, 0xb0}, {0x82, 0x72}, {0x80, 0x34}, {0x81, 0xf6},
2677     {0x84, 0xb8}, {0x85, 0x7a}, {0x87, 0x3c}, {0x86, 0xfe},
2678     {0xa9, 0xc0}, {0xa8, 0x02}, {0xaa, 0x44}, {0xab, 0x86},
2679     {0xae, 0xc8}, {0xaf, 0x0a}, {0xad, 0x4c}, {0xac, 0x8e},
2680     {0xa7, 0xd0}, {0xa6, 0x12}, {0xa4, 0x54}, {0xa5, 0x96},
2681     {0xa0, 0xd8}, {0xa1, 0x1a}, {0xa3, 0x5c}, {0xa2, 0x9e},
2682     {0xb5, 0xe0}, {0xb4, 0x22}, {0xb6, 0x64}, {0xb7, 0xa6},
2683     {0xb2, 0xe8}, {0xb3, 0x2a}, {0xb1, 0x6c}, {0xb0, 0xae},
2684     {0xbb, 0xf0}, {0xba, 0x32}, {0xb8, 0x74}, {0xb9, 0xb6},
2685     {0xbc, 0xf8}, {0xbd, 0x3a}, {0xbf, 0x7c}, {0xbe, 0xbe} };
2686
2687
2688 static void GMULT(byte *x, byte m[256][AES_BLOCK_SIZE])
2689 {
2690     int i, j;
2691     byte Z[AES_BLOCK_SIZE];
2692     byte a;
2693
2694     XMEMSET(Z, 0, sizeof(Z));
2695
2696     for (i = 15; i > 0; i--) {
2697         xorbuf(Z, m[x[i]], AES_BLOCK_SIZE);
2698         a = Z[15];
2699
2700         for (j = 15; j > 0; j--) {
2701             Z[j] = Z[j-1];
2702         }
2703
2704         Z[0] = R[a][0];
2705         Z[1] ^= R[a][1];
2706     }
2707     xorbuf(Z, m[x[0]], AES_BLOCK_SIZE);
2708
2709     XMEMCPY(x, Z, AES_BLOCK_SIZE);
2710 }
2711
2712
2713 static void GHASH(Aes* aes, const byte* a, word32 aSz,
2714                                 const byte* c, word32 cSz, byte* s, word32 sSz)
2715 {
2716     byte x[AES_BLOCK_SIZE];
2717     byte scratch[AES_BLOCK_SIZE];
2718     word32 blocks, partial;
2719
2720     XMEMSET(x, 0, AES_BLOCK_SIZE);
2721
2722     /* Hash in A, the Additional Authentication Data */
2723     if (aSz != 0 && a != NULL) {
2724         blocks = aSz / AES_BLOCK_SIZE;
2725         partial = aSz % AES_BLOCK_SIZE;
2726         while (blocks--) {
2727             xorbuf(x, a, AES_BLOCK_SIZE);
2728             GMULT(x, aes->M0);
2729             a += AES_BLOCK_SIZE;
2730         }
2731         if (partial != 0) {
2732             XMEMSET(scratch, 0, AES_BLOCK_SIZE);
2733             XMEMCPY(scratch, a, partial);
2734             xorbuf(x, scratch, AES_BLOCK_SIZE);
2735             GMULT(x, aes->M0);
2736         }
2737     }
2738
2739     /* Hash in C, the Ciphertext */
2740     if (cSz != 0 && c != NULL) {
2741         blocks = cSz / AES_BLOCK_SIZE;
2742         partial = cSz % AES_BLOCK_SIZE;
2743         while (blocks--) {
2744             xorbuf(x, c, AES_BLOCK_SIZE);
2745             GMULT(x, aes->M0);
2746             c += AES_BLOCK_SIZE;
2747         }
2748         if (partial != 0) {
2749             XMEMSET(scratch, 0, AES_BLOCK_SIZE);
2750             XMEMCPY(scratch, c, partial);
2751             xorbuf(x, scratch, AES_BLOCK_SIZE);
2752             GMULT(x, aes->M0);
2753         }
2754     }
2755
2756     /* Hash in the lengths of A and C in bits */
2757     FlattenSzInBits(&scratch[0], aSz);
2758     FlattenSzInBits(&scratch[8], cSz);
2759     xorbuf(x, scratch, AES_BLOCK_SIZE);
2760     GMULT(x, aes->M0);
2761
2762     /* Copy the result into s. */
2763     XMEMCPY(s, x, sSz);
2764 }
2765
2766 /* end GCM_TABLE */
2767 #elif defined(WORD64_AVAILABLE) && !defined(GCM_WORD32)
2768
2769 static void GMULT(word64* X, word64* Y)
2770 {
2771     word64 Z[2] = {0,0};
2772     word64 V[2] ; 
2773     int i, j;
2774     V[0] = X[0] ;  V[1] = X[1] ;
2775
2776     for (i = 0; i < 2; i++)
2777     {
2778         word64 y = Y[i];
2779         for (j = 0; j < 64; j++)
2780         {
2781             if (y & 0x8000000000000000) {
2782                 Z[0] ^= V[0];
2783                 Z[1] ^= V[1];
2784             }
2785
2786             if (V[1] & 0x0000000000000001) {
2787                 V[1] >>= 1;
2788                 V[1] |= ((V[0] & 0x0000000000000001) ? 0x8000000000000000 : 0);
2789                 V[0] >>= 1;
2790                 V[0] ^= 0xE100000000000000;
2791             }
2792             else {
2793                 V[1] >>= 1;
2794                 V[1] |= ((V[0] & 0x0000000000000001) ? 0x8000000000000000 : 0);
2795                 V[0] >>= 1;
2796             }
2797             y <<= 1;
2798         }
2799     }
2800     X[0] = Z[0];
2801     X[1] = Z[1];
2802 }
2803
2804
2805 static void GHASH(Aes* aes, const byte* a, word32 aSz,
2806                                 const byte* c, word32 cSz, byte* s, word32 sSz)
2807 {
2808     word64 x[2] = {0,0};
2809     word32 blocks, partial;
2810     word64 bigH[2];
2811
2812     XMEMCPY(bigH, aes->H, AES_BLOCK_SIZE);
2813     #ifdef LITTLE_ENDIAN_ORDER
2814         ByteReverseWords64(bigH, bigH, AES_BLOCK_SIZE); 
2815     #endif
2816
2817     /* Hash in A, the Additional Authentication Data */
2818     if (aSz != 0 && a != NULL) {
2819         word64 bigA[2];
2820         blocks = aSz / AES_BLOCK_SIZE;
2821         partial = aSz % AES_BLOCK_SIZE;
2822         while (blocks--) {
2823             XMEMCPY(bigA, a, AES_BLOCK_SIZE);
2824             #ifdef LITTLE_ENDIAN_ORDER
2825                 ByteReverseWords64(bigA, bigA, AES_BLOCK_SIZE);
2826             #endif
2827             x[0] ^= bigA[0];
2828             x[1] ^= bigA[1];
2829             GMULT(x, bigH);
2830             a += AES_BLOCK_SIZE;
2831         }
2832         if (partial != 0) {
2833             XMEMSET(bigA, 0, AES_BLOCK_SIZE);
2834             XMEMCPY(bigA, a, partial);
2835             #ifdef LITTLE_ENDIAN_ORDER
2836                 ByteReverseWords64(bigA, bigA, AES_BLOCK_SIZE);
2837             #endif
2838             x[0] ^= bigA[0];
2839             x[1] ^= bigA[1];
2840             GMULT(x, bigH);
2841         }
2842     }
2843
2844     /* Hash in C, the Ciphertext */
2845     if (cSz != 0 && c != NULL) {
2846         word64 bigC[2];
2847         blocks = cSz / AES_BLOCK_SIZE;
2848         partial = cSz % AES_BLOCK_SIZE;
2849         while (blocks--) {
2850             XMEMCPY(bigC, c, AES_BLOCK_SIZE);
2851             #ifdef LITTLE_ENDIAN_ORDER
2852                 ByteReverseWords64(bigC, bigC, AES_BLOCK_SIZE);
2853             #endif
2854             x[0] ^= bigC[0];
2855             x[1] ^= bigC[1];
2856             GMULT(x, bigH);
2857             c += AES_BLOCK_SIZE;
2858         }
2859         if (partial != 0) {
2860             XMEMSET(bigC, 0, AES_BLOCK_SIZE);
2861             XMEMCPY(bigC, c, partial);
2862             #ifdef LITTLE_ENDIAN_ORDER
2863                 ByteReverseWords64(bigC, bigC, AES_BLOCK_SIZE);
2864             #endif
2865             x[0] ^= bigC[0];
2866             x[1] ^= bigC[1];
2867             GMULT(x, bigH);
2868         }
2869     }
2870
2871     /* Hash in the lengths in bits of A and C */
2872     {
2873         word64 len[2] ; 
2874         len[0] = aSz ; len[1] = cSz;
2875
2876         /* Lengths are in bytes. Convert to bits. */
2877         len[0] *= 8;
2878         len[1] *= 8;
2879
2880         x[0] ^= len[0];
2881         x[1] ^= len[1];
2882         GMULT(x, bigH);
2883     }
2884     #ifdef LITTLE_ENDIAN_ORDER
2885         ByteReverseWords64(x, x, AES_BLOCK_SIZE);
2886     #endif
2887     XMEMCPY(s, x, sSz);
2888 }
2889
2890 /* end defined(WORD64_AVAILABLE) && !defined(GCM_WORD32) */
2891 #else /* GCM_WORD32 */
2892
2893 static void GMULT(word32* X, word32* Y)
2894 {
2895     word32 Z[4] = {0,0,0,0};
2896     word32 V[4] ;
2897     int i, j;
2898
2899     V[0] = X[0];  V[1] = X[1]; V[2] =  X[2]; V[3] =  X[3];
2900
2901     for (i = 0; i < 4; i++)
2902     {
2903         word32 y = Y[i];
2904         for (j = 0; j < 32; j++)
2905         {
2906             if (y & 0x80000000) {
2907                 Z[0] ^= V[0];
2908                 Z[1] ^= V[1];
2909                 Z[2] ^= V[2];
2910                 Z[3] ^= V[3];
2911             }
2912
2913             if (V[3] & 0x00000001) {
2914                 V[3] >>= 1;
2915                 V[3] |= ((V[2] & 0x00000001) ? 0x80000000 : 0);
2916                 V[2] >>= 1;
2917                 V[2] |= ((V[1] & 0x00000001) ? 0x80000000 : 0);
2918                 V[1] >>= 1;
2919                 V[1] |= ((V[0] & 0x00000001) ? 0x80000000 : 0);
2920                 V[0] >>= 1;
2921                 V[0] ^= 0xE1000000;
2922             } else {
2923                 V[3] >>= 1;
2924                 V[3] |= ((V[2] & 0x00000001) ? 0x80000000 : 0);
2925                 V[2] >>= 1;
2926                 V[2] |= ((V[1] & 0x00000001) ? 0x80000000 : 0);
2927                 V[1] >>= 1;
2928                 V[1] |= ((V[0] & 0x00000001) ? 0x80000000 : 0);
2929                 V[0] >>= 1;
2930             }
2931             y <<= 1;
2932         }
2933     }
2934     X[0] = Z[0];
2935     X[1] = Z[1];
2936     X[2] = Z[2];
2937     X[3] = Z[3];
2938 }
2939
2940
2941 static void GHASH(Aes* aes, const byte* a, word32 aSz,
2942                                 const byte* c, word32 cSz, byte* s, word32 sSz)
2943 {
2944     word32 x[4] = {0,0,0,0};
2945     word32 blocks, partial;
2946     word32 bigH[4];
2947
2948     XMEMCPY(bigH, aes->H, AES_BLOCK_SIZE);
2949     #ifdef LITTLE_ENDIAN_ORDER
2950         ByteReverseWords(bigH, bigH, AES_BLOCK_SIZE); 
2951     #endif
2952
2953     /* Hash in A, the Additional Authentication Data */
2954     if (aSz != 0 && a != NULL) {
2955         word32 bigA[4];
2956         blocks = aSz / AES_BLOCK_SIZE;
2957         partial = aSz % AES_BLOCK_SIZE;
2958         while (blocks--) {
2959             XMEMCPY(bigA, a, AES_BLOCK_SIZE);
2960             #ifdef LITTLE_ENDIAN_ORDER
2961                 ByteReverseWords(bigA, bigA, AES_BLOCK_SIZE);
2962             #endif
2963             x[0] ^= bigA[0];
2964             x[1] ^= bigA[1];
2965             x[2] ^= bigA[2];
2966             x[3] ^= bigA[3];
2967             GMULT(x, bigH);
2968             a += AES_BLOCK_SIZE;
2969         }
2970         if (partial != 0) {
2971             XMEMSET(bigA, 0, AES_BLOCK_SIZE);
2972             XMEMCPY(bigA, a, partial);
2973             #ifdef LITTLE_ENDIAN_ORDER
2974                 ByteReverseWords(bigA, bigA, AES_BLOCK_SIZE);
2975             #endif
2976             x[0] ^= bigA[0];
2977             x[1] ^= bigA[1];
2978             x[2] ^= bigA[2];
2979             x[3] ^= bigA[3];
2980             GMULT(x, bigH);
2981         }
2982     }
2983
2984     /* Hash in C, the Ciphertext */
2985     if (cSz != 0 && c != NULL) {
2986         word32 bigC[4];
2987         blocks = cSz / AES_BLOCK_SIZE;
2988         partial = cSz % AES_BLOCK_SIZE;
2989         while (blocks--) {
2990             XMEMCPY(bigC, c, AES_BLOCK_SIZE);
2991             #ifdef LITTLE_ENDIAN_ORDER
2992                 ByteReverseWords(bigC, bigC, AES_BLOCK_SIZE);
2993             #endif
2994             x[0] ^= bigC[0];
2995             x[1] ^= bigC[1];
2996             x[2] ^= bigC[2];
2997             x[3] ^= bigC[3];
2998             GMULT(x, bigH);
2999             c += AES_BLOCK_SIZE;
3000         }
3001         if (partial != 0) {
3002             XMEMSET(bigC, 0, AES_BLOCK_SIZE);
3003             XMEMCPY(bigC, c, partial);
3004             #ifdef LITTLE_ENDIAN_ORDER
3005                 ByteReverseWords(bigC, bigC, AES_BLOCK_SIZE);
3006             #endif
3007             x[0] ^= bigC[0];
3008             x[1] ^= bigC[1];
3009             x[2] ^= bigC[2];
3010             x[3] ^= bigC[3];
3011             GMULT(x, bigH);
3012         }
3013     }
3014
3015     /* Hash in the lengths in bits of A and C */
3016     {
3017         word32 len[4];
3018
3019         /* Lengths are in bytes. Convert to bits. */
3020         len[0] = (aSz >> (8*sizeof(aSz) - 3));
3021         len[1] = aSz << 3;
3022         len[2] = (cSz >> (8*sizeof(cSz) - 3));
3023         len[3] = cSz << 3;
3024
3025         x[0] ^= len[0];
3026         x[1] ^= len[1];
3027         x[2] ^= len[2];
3028         x[3] ^= len[3];
3029         GMULT(x, bigH);
3030     }
3031     #ifdef LITTLE_ENDIAN_ORDER
3032         ByteReverseWords(x, x, AES_BLOCK_SIZE);
3033     #endif
3034     XMEMCPY(s, x, sSz);
3035 }
3036
3037 #endif /* end GCM_WORD32 */
3038
3039
3040 void AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz,
3041                    const byte* iv, word32 ivSz,
3042                    byte* authTag, word32 authTagSz,
3043                    const byte* authIn, word32 authInSz)
3044 {
3045     word32 blocks = sz / AES_BLOCK_SIZE;
3046     word32 partial = sz % AES_BLOCK_SIZE;
3047     const byte* p = in;
3048     byte* c = out;
3049     byte counter[AES_BLOCK_SIZE];
3050     byte *ctr ;
3051     byte scratch[AES_BLOCK_SIZE];
3052
3053     CYASSL_ENTER("AesGcmEncrypt");
3054
3055 #ifdef CYASSL_PIC32MZ_CRYPT
3056     ctr = (char *)aes->iv_ce ;
3057 #else
3058     ctr = counter ;
3059 #endif
3060
3061     XMEMSET(ctr, 0, AES_BLOCK_SIZE);
3062     XMEMCPY(ctr, iv, ivSz);
3063     InitGcmCounter(ctr);
3064
3065 #ifdef CYASSL_PIC32MZ_CRYPT
3066     if(blocks)
3067         AesCrypt(aes, out, in, blocks*AES_BLOCK_SIZE,
3068              PIC32_ENCRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_AES_GCM );
3069 #endif
3070     while (blocks--) {
3071         IncrementGcmCounter(ctr);
3072         #ifndef CYASSL_PIC32MZ_CRYPT
3073         AesEncrypt(aes, ctr, scratch);
3074         xorbuf(scratch, p, AES_BLOCK_SIZE);
3075         XMEMCPY(c, scratch, AES_BLOCK_SIZE);
3076         #endif
3077         p += AES_BLOCK_SIZE;
3078         c += AES_BLOCK_SIZE;
3079     }
3080
3081     if (partial != 0) {
3082         IncrementGcmCounter(ctr);
3083         AesEncrypt(aes, ctr, scratch);
3084         xorbuf(scratch, p, partial);
3085         XMEMCPY(c, scratch, partial);
3086
3087     }
3088
3089     GHASH(aes, authIn, authInSz, out, sz, authTag, authTagSz);
3090     InitGcmCounter(ctr);
3091     AesEncrypt(aes, ctr, scratch);
3092     xorbuf(authTag, scratch, authTagSz);
3093
3094 }
3095
3096
3097 int  AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz,
3098                    const byte* iv, word32 ivSz,
3099                    const byte* authTag, word32 authTagSz,
3100                    const byte* authIn, word32 authInSz)
3101 {
3102     word32 blocks = sz / AES_BLOCK_SIZE;
3103     word32 partial = sz % AES_BLOCK_SIZE;
3104     const byte* c = in;
3105     byte* p = out;
3106     byte counter[AES_BLOCK_SIZE];
3107     byte *ctr ;
3108     byte scratch[AES_BLOCK_SIZE];
3109
3110     CYASSL_ENTER("AesGcmDecrypt");
3111
3112 #ifdef CYASSL_PIC32MZ_CRYPT
3113     ctr = (char *)aes->iv_ce ;
3114 #else
3115     ctr = counter ;
3116 #endif
3117
3118     XMEMSET(ctr, 0, AES_BLOCK_SIZE);
3119     XMEMCPY(ctr, iv, ivSz);
3120     InitGcmCounter(ctr);
3121
3122     /* Calculate the authTag again using the received auth data and the
3123      * cipher text. */
3124     {
3125         byte Tprime[AES_BLOCK_SIZE];
3126         byte EKY0[AES_BLOCK_SIZE];
3127
3128         GHASH(aes, authIn, authInSz, in, sz, Tprime, sizeof(Tprime));
3129         AesEncrypt(aes, ctr, EKY0);
3130         xorbuf(Tprime, EKY0, sizeof(Tprime));
3131
3132         if (XMEMCMP(authTag, Tprime, authTagSz) != 0) {
3133             return AES_GCM_AUTH_E;
3134         }
3135     }
3136  
3137 #ifdef CYASSL_PIC32MZ_CRYPT
3138     if(blocks)
3139         AesCrypt(aes, out, in, blocks*AES_BLOCK_SIZE,
3140              PIC32_DECRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_AES_GCM );
3141 #endif
3142
3143     while (blocks--) {
3144         IncrementGcmCounter(ctr);
3145         #ifndef CYASSL_PIC32MZ_CRYPT
3146         AesEncrypt(aes, ctr, scratch);
3147         xorbuf(scratch, c, AES_BLOCK_SIZE);
3148         XMEMCPY(p, scratch, AES_BLOCK_SIZE);
3149         #endif
3150         p += AES_BLOCK_SIZE;
3151         c += AES_BLOCK_SIZE;
3152     }
3153     if (partial != 0) {
3154         IncrementGcmCounter(ctr);
3155         AesEncrypt(aes, ctr, scratch);
3156         xorbuf(scratch, c, partial);
3157         XMEMCPY(p, scratch, partial);
3158     }
3159     return 0;
3160 }
3161
3162
3163
3164 CYASSL_API void GmacSetKey(Gmac* gmac, const byte* key, word32 len)
3165 {
3166     AesGcmSetKey(&gmac->aes, key, len);
3167 }
3168
3169
3170 CYASSL_API void GmacUpdate(Gmac* gmac, const byte* iv, word32 ivSz,
3171                               const byte* authIn, word32 authInSz,
3172                               byte* authTag, word32 authTagSz)
3173 {
3174     AesGcmEncrypt(&gmac->aes, NULL, NULL, 0, iv, ivSz,
3175                                          authTag, authTagSz, authIn, authInSz);
3176 }
3177
3178 #endif /* HAVE_AESGCM */
3179
3180 #ifdef HAVE_AESCCM
3181
3182 void AesCcmSetKey(Aes* aes, const byte* key, word32 keySz)
3183 {
3184     byte nonce[AES_BLOCK_SIZE];
3185
3186     if (!((keySz == 16) || (keySz == 24) || (keySz == 32)))
3187         return;
3188
3189     XMEMSET(nonce, 0, sizeof(nonce));
3190     AesSetKey(aes, key, keySz, nonce, AES_ENCRYPTION);
3191 }
3192
3193
3194 static void roll_x(Aes* aes, const byte* in, word32 inSz, byte* out)
3195 {
3196     /* process the bulk of the data */
3197     while (inSz >= AES_BLOCK_SIZE) {
3198         xorbuf(out, in, AES_BLOCK_SIZE);
3199         in += AES_BLOCK_SIZE;
3200         inSz -= AES_BLOCK_SIZE;
3201
3202         AesEncrypt(aes, out, out);
3203     }
3204
3205     /* process remainder of the data */
3206     if (inSz > 0) {
3207         xorbuf(out, in, inSz);
3208         AesEncrypt(aes, out, out);
3209     }
3210 }
3211
3212
3213 static void roll_auth(Aes* aes, const byte* in, word32 inSz, byte* out)
3214 {
3215     word32 authLenSz;
3216     word32 remainder;
3217
3218     /* encode the length in */
3219     if (inSz <= 0xFEFF) {
3220         authLenSz = 2;
3221         out[0] ^= ((inSz & 0xFF00) >> 8);
3222         out[1] ^=  (inSz & 0x00FF);
3223     }
3224     else if (inSz <= 0xFFFFFFFF) {
3225         authLenSz = 6;
3226         out[0] ^= 0xFF; out[1] ^= 0xFE;
3227         out[2] ^= ((inSz & 0xFF000000) >> 24);
3228         out[3] ^= ((inSz & 0x00FF0000) >> 16);
3229         out[4] ^= ((inSz & 0x0000FF00) >>  8);
3230         out[5] ^=  (inSz & 0x000000FF);
3231     }
3232     /* Note, the protocol handles auth data up to 2^64, but we are
3233      * using 32-bit sizes right now, so the bigger data isn't handled
3234      * else if (inSz <= 0xFFFFFFFFFFFFFFFF) {} */
3235     else
3236         return;
3237
3238     /* start fill out the rest of the first block */
3239     remainder = AES_BLOCK_SIZE - authLenSz;
3240     if (inSz >= remainder) {
3241         /* plenty of bulk data to fill the remainder of this block */
3242         xorbuf(out + authLenSz, in, remainder);
3243         inSz -= remainder;
3244         in += remainder;
3245     }
3246     else {
3247         /* not enough bulk data, copy what is available, and pad zero */
3248         xorbuf(out + authLenSz, in, inSz);
3249         inSz = 0;
3250     }
3251     AesEncrypt(aes, out, out);
3252
3253     if (inSz > 0)
3254         roll_x(aes, in, inSz, out);
3255 }
3256
3257
3258 static INLINE void AesCcmCtrInc(byte* B, word32 lenSz)
3259 {
3260     word32 i;
3261
3262     for (i = 0; i < lenSz; i++) {
3263         if (++B[AES_BLOCK_SIZE - 1 - i] != 0) return;
3264     }
3265 }
3266
3267
3268 void AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
3269                    const byte* nonce, word32 nonceSz,
3270                    byte* authTag, word32 authTagSz,
3271                    const byte* authIn, word32 authInSz)
3272 {
3273     byte A[AES_BLOCK_SIZE];
3274     byte B[AES_BLOCK_SIZE];
3275     byte lenSz;
3276     word32 i;
3277
3278     XMEMCPY(B+1, nonce, nonceSz);
3279     lenSz = AES_BLOCK_SIZE - 1 - (byte)nonceSz;
3280     B[0] = (authInSz > 0 ? 64 : 0)
3281          + (8 * (((byte)authTagSz - 2) / 2))
3282          + (lenSz - 1);
3283     for (i = 0; i < lenSz; i++)
3284         B[AES_BLOCK_SIZE - 1 - i] = (inSz >> (8 * i)) & 0xFF;
3285
3286     AesEncrypt(aes, B, A);
3287     if (authInSz > 0)
3288         roll_auth(aes, authIn, authInSz, A);
3289     if (inSz > 0)
3290         roll_x(aes, in, inSz, A);
3291     XMEMCPY(authTag, A, authTagSz);
3292
3293     B[0] = lenSz - 1;
3294     for (i = 0; i < lenSz; i++)
3295         B[AES_BLOCK_SIZE - 1 - i] = 0;
3296     AesEncrypt(aes, B, A);
3297     xorbuf(authTag, A, authTagSz);
3298
3299     B[15] = 1;
3300     while (inSz >= AES_BLOCK_SIZE) {
3301         AesEncrypt(aes, B, A);
3302         xorbuf(A, in, AES_BLOCK_SIZE);
3303         XMEMCPY(out, A, AES_BLOCK_SIZE);
3304
3305         AesCcmCtrInc(B, lenSz);
3306         inSz -= AES_BLOCK_SIZE;
3307         in += AES_BLOCK_SIZE;
3308         out += AES_BLOCK_SIZE;
3309     }
3310     if (inSz > 0) {
3311         AesEncrypt(aes, B, A);
3312         xorbuf(A, in, inSz);
3313         XMEMCPY(out, A, inSz);
3314     }
3315
3316     XMEMSET(A, 0, AES_BLOCK_SIZE);
3317     XMEMSET(B, 0, AES_BLOCK_SIZE);
3318 }
3319
3320
3321 int  AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz,
3322                    const byte* nonce, word32 nonceSz,
3323                    const byte* authTag, word32 authTagSz,
3324                    const byte* authIn, word32 authInSz)
3325 {
3326     byte A[AES_BLOCK_SIZE];
3327     byte B[AES_BLOCK_SIZE];
3328     byte* o;
3329     byte lenSz;
3330     word32 i, oSz;
3331     int result = 0;
3332
3333     o = out;
3334     oSz = inSz;
3335     XMEMCPY(B+1, nonce, nonceSz);
3336     lenSz = AES_BLOCK_SIZE - 1 - (byte)nonceSz;
3337
3338     B[0] = lenSz - 1;
3339     for (i = 0; i < lenSz; i++)
3340         B[AES_BLOCK_SIZE - 1 - i] = 0;
3341     B[15] = 1;
3342     
3343     while (oSz >= AES_BLOCK_SIZE) {
3344         AesEncrypt(aes, B, A);
3345         xorbuf(A, in, AES_BLOCK_SIZE);
3346         XMEMCPY(o, A, AES_BLOCK_SIZE);
3347
3348         AesCcmCtrInc(B, lenSz);
3349         oSz -= AES_BLOCK_SIZE;
3350         in += AES_BLOCK_SIZE;
3351         o += AES_BLOCK_SIZE;
3352     }
3353     if (inSz > 0) {
3354         AesEncrypt(aes, B, A);
3355         xorbuf(A, in, oSz);
3356         XMEMCPY(o, A, oSz);
3357     }
3358
3359     for (i = 0; i < lenSz; i++)
3360         B[AES_BLOCK_SIZE - 1 - i] = 0;
3361     AesEncrypt(aes, B, A);
3362
3363     o = out;
3364     oSz = inSz;
3365
3366     B[0] = (authInSz > 0 ? 64 : 0)
3367          + (8 * (((byte)authTagSz - 2) / 2))
3368          + (lenSz - 1);
3369     for (i = 0; i < lenSz; i++)
3370         B[AES_BLOCK_SIZE - 1 - i] = (inSz >> (8 * i)) & 0xFF;
3371
3372     AesEncrypt(aes, B, A);
3373     if (authInSz > 0)
3374         roll_auth(aes, authIn, authInSz, A);
3375     if (inSz > 0)
3376         roll_x(aes, o, oSz, A);
3377
3378     B[0] = lenSz - 1;
3379     for (i = 0; i < lenSz; i++)
3380         B[AES_BLOCK_SIZE - 1 - i] = 0;
3381     AesEncrypt(aes, B, B);
3382     xorbuf(A, B, authTagSz);
3383
3384     if (XMEMCMP(A, authTag, authTagSz) != 0) {
3385         /* If the authTag check fails, don't keep the decrypted data.
3386          * Unfortunately, you need the decrypted data to calculate the
3387          * check value. */
3388         XMEMSET(out, 0, inSz);
3389         result = AES_CCM_AUTH_E;
3390     }
3391
3392     XMEMSET(A, 0, AES_BLOCK_SIZE);
3393     XMEMSET(B, 0, AES_BLOCK_SIZE);
3394     o = NULL;
3395
3396     return result;
3397 }
3398
3399 #endif
3400
3401 #endif /* STM32F2_CRYPTO */
3402
3403 int AesSetIV(Aes* aes, const byte* iv)
3404 {
3405     if (aes == NULL)
3406         return BAD_FUNC_ARG;
3407
3408     if (iv)
3409         XMEMCPY(aes->reg, iv, AES_BLOCK_SIZE);
3410     else
3411         XMEMSET(aes->reg,  0, AES_BLOCK_SIZE);
3412
3413     return 0;
3414 }
3415
3416
3417 #ifdef HAVE_CAVIUM
3418
3419 #include <cyassl/ctaocrypt/logging.h>
3420 #include "cavium_common.h"
3421
3422 /* Initiliaze Aes for use with Nitrox device */
3423 int AesInitCavium(Aes* aes, int devId)
3424 {
3425     if (aes == NULL)
3426         return -1;
3427
3428     if (CspAllocContext(CONTEXT_SSL, &aes->contextHandle, devId) != 0)
3429         return -1;
3430
3431     aes->devId = devId;
3432     aes->magic = CYASSL_AES_CAVIUM_MAGIC;
3433    
3434     return 0;
3435 }
3436
3437
3438 /* Free Aes from use with Nitrox device */
3439 void AesFreeCavium(Aes* aes)
3440 {
3441     if (aes == NULL)
3442         return;
3443
3444     if (aes->magic != CYASSL_AES_CAVIUM_MAGIC)
3445         return;
3446
3447     CspFreeContext(CONTEXT_SSL, aes->contextHandle, aes->devId);
3448     aes->magic = 0;
3449 }
3450
3451
3452 static int AesCaviumSetKey(Aes* aes, const byte* key, word32 length,
3453                            const byte* iv)
3454 {
3455     if (aes == NULL)
3456         return -1;
3457
3458     XMEMCPY(aes->key, key, length);   /* key still holds key, iv still in reg */
3459     if (length == 16)
3460         aes->type = AES_128;
3461     else if (length == 24)
3462         aes->type = AES_192;
3463     else if (length == 32)
3464         aes->type = AES_256;
3465
3466     return AesSetIV(aes, iv);
3467 }
3468
3469
3470 static int AesCaviumCbcEncrypt(Aes* aes, byte* out, const byte* in,
3471                                word32 length)
3472 {
3473     word   offset = 0;
3474     word32 requestId;
3475
3476     while (length > CYASSL_MAX_16BIT) {
3477         word16 slen = (word16)CYASSL_MAX_16BIT;
3478         if (CspEncryptAes(CAVIUM_BLOCKING, aes->contextHandle, CAVIUM_NO_UPDATE,
3479                           aes->type, slen, (byte*)in + offset, out + offset,
3480                           (byte*)aes->reg, (byte*)aes->key, &requestId,
3481                           aes->devId) != 0) {
3482             CYASSL_MSG("Bad Cavium Aes Encrypt");
3483             return -1;
3484         }
3485         length -= CYASSL_MAX_16BIT;
3486         offset += CYASSL_MAX_16BIT;
3487         XMEMCPY(aes->reg, out + offset - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
3488     }
3489     if (length) {
3490         word16 slen = (word16)length;
3491         if (CspEncryptAes(CAVIUM_BLOCKING, aes->contextHandle, CAVIUM_NO_UPDATE,
3492                           aes->type, slen, (byte*)in + offset, out + offset,
3493                           (byte*)aes->reg, (byte*)aes->key, &requestId,
3494                           aes->devId) != 0) {
3495             CYASSL_MSG("Bad Cavium Aes Encrypt");
3496             return -1;
3497         }
3498         XMEMCPY(aes->reg, out + offset+length - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
3499     }
3500     return 0;
3501 }
3502
3503 static int AesCaviumCbcDecrypt(Aes* aes, byte* out, const byte* in,
3504                                word32 length)
3505 {
3506     word32 requestId;
3507     word   offset = 0;
3508
3509     while (length > CYASSL_MAX_16BIT) {
3510         word16 slen = (word16)CYASSL_MAX_16BIT;
3511         XMEMCPY(aes->tmp, in + offset + slen - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
3512         if (CspDecryptAes(CAVIUM_BLOCKING, aes->contextHandle, CAVIUM_NO_UPDATE,
3513                           aes->type, slen, (byte*)in + offset, out + offset,
3514                           (byte*)aes->reg, (byte*)aes->key, &requestId,
3515                           aes->devId) != 0) {
3516             CYASSL_MSG("Bad Cavium Aes Decrypt");
3517             return -1;
3518         }
3519         length -= CYASSL_MAX_16BIT;
3520         offset += CYASSL_MAX_16BIT;
3521         XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE);
3522     }
3523     if (length) {
3524         word16 slen = (word16)length;
3525         XMEMCPY(aes->tmp, in + offset + slen - AES_BLOCK_SIZE, AES_BLOCK_SIZE);
3526         if (CspDecryptAes(CAVIUM_BLOCKING, aes->contextHandle, CAVIUM_NO_UPDATE,
3527                           aes->type, slen, (byte*)in + offset, out + offset,
3528                           (byte*)aes->reg, (byte*)aes->key, &requestId,
3529                           aes->devId) != 0) {
3530             CYASSL_MSG("Bad Cavium Aes Decrypt");
3531             return -1;
3532         }
3533         XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE);
3534     }
3535     return 0;
3536 }
3537
3538 #endif /* HAVE_CAVIUM */
3539
3540 #endif /* NO_AES */
3541
3542