3 ; *Copyright[C]2006 -2014 wolfSSL Inc .
\r
5 ; *This file is part of CyaSSL .
\r
7 ; *CyaSSL is free software/ you can redistribute it and/or modify
\r
8 ; *it under the terms of the GNU General Public License as published by
\r
9 ; *the Free Software Foundation/ either version 2 of the License, or
\r
10 ; *[at your option]any later version .
\r
12 ; *CyaSSL ,is distributed in the hope that it will be useful
\r
13 ; *but WITHOUT ANY WARRANTY/ without even the implied warranty of
\r
14 ; *MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
\r
15 ; *GNU General Public License for more details .
\r
17 ; *You should have received a copy of the GNU General Public License
\r
18 ; *along with this program/ if not, write to the Free Software
\r
19 ; *Foundation,Inc .,51 Franklin Street,Fifth Floor,Boston,MA 02110-1301,USA
\r
23 ; /*See IntelA dvanced Encryption Standard[AES]Instructions Set White Paper
\r
24 ; *by Israel,Intel Mobility Group Development Center,Israel Shay Gueron
\r
27 ; /* This file is in intel asm syntax, see .s for at&t syntax */
\r
30 ; AES_CBC_encrypt[const ,unsigned char*in
\r
31 ; unsigned ,char*out
\r
32 ; unsigned ,char ivec+16
\r
33 ; unsigned ,long length
\r
34 ; const ,unsigned char*KS
\r
38 AES_CBC_encrypt PROC
\r
46 ; save rdi and rsi to rax and r11, restore before ret
\r
50 ; convert to what we had for att&t convention
\r
94 aesenclast xmm1,xmm2
\r
97 ; restore non volatile rdi,rsi
\r
101 AES_CBC_encrypt ENDP
\r
106 ; AES_CBC_decrypt[const ,unsigned char*in
\r
107 ; unsigned ,char*out
\r
108 ; unsigned ,char ivec+16
\r
109 ; unsigned ,long length
\r
110 ; const ,unsigned char*KS
\r
113 ; . globl AES_CBC_decrypt
\r
114 AES_CBC_decrypt PROC
\r
115 ;# parameter 1: rdi
\r
116 ;# parameter 2: rsi
\r
117 ;# parameter 3: rdx
\r
118 ;# parameter 4: rcx
\r
120 ;# parameter 6: r9d
\r
122 ; save rdi and rsi to rax and r11, restore before ret
\r
126 ; convert to what we had for att&t convention
\r
134 ; on microsoft xmm6-xmm15 are non volaitle, let's save on stack and restore at end
\r
135 sub rsp,8+8*16 ; 8 = align stack , 8 xmm6-12,15 16 bytes each
\r
136 movdqa [rsp+0], xmm6
\r
137 movdqa [rsp+16], xmm7
\r
138 movdqa [rsp+32], xmm8
\r
139 movdqa [rsp+48], xmm9
\r
140 movdqa [rsp+64], xmm10
\r
141 movdqa [rsp+80], xmm11
\r
142 movdqa [rsp+96], xmm12
\r
143 movdqa [rsp+112], xmm15
\r
160 movdqu xmm2,16[rdi]
\r
161 movdqu xmm3,32[rdi]
\r
162 movdqu xmm4,48[rdi]
\r
168 movdqa xmm10,16[r8]
\r
169 movdqa xmm11,32[r8]
\r
170 movdqa xmm12,48[r8]
\r
189 movdqa xmm10,80[r8]
\r
190 movdqa xmm11,96[r8]
\r
191 movdqa xmm12,112[r8]
\r
208 movdqa xmm9,128[r8]
\r
209 movdqa xmm10,144[r8]
\r
210 movdqa xmm11,160[r8]
\r
221 movdqa xmm9,160[r8]
\r
222 movdqa xmm10,176[r8]
\r
223 movdqa xmm11,192[r8]
\r
235 movdqa xmm9,192[r8]
\r
236 movdqa xmm10,208[r8]
\r
237 movdqa xmm11,224[r8]
\r
250 aesdeclast xmm1,xmm11
\r
251 aesdeclast xmm2,xmm11
\r
252 aesdeclast xmm3,xmm11
\r
253 aesdeclast xmm4,xmm11
\r
259 movdqu 16[rsi],xmm2
\r
260 movdqu 32[rsi],xmm3
\r
261 movdqu 48[rsi],xmm4
\r
273 movdqu xmm2,160[r8]
\r
281 aesdec xmm1,112[r8]
\r
282 aesdec xmm1,128[r8]
\r
283 aesdec xmm1,144[r8]
\r
285 movdqu xmm2,192[r8]
\r
287 aesdec xmm1,160[r8]
\r
288 aesdec xmm1,176[r8]
\r
290 movdqu xmm2,224[r8]
\r
291 aesdec xmm1,192[r8]
\r
292 aesdec xmm1,208[r8]
\r
294 aesdeclast xmm1,xmm2
\r
303 ; restore non volatile rdi,rsi
\r
306 ; restore non volatile xmms from stack
\r
307 movdqa xmm6, [rsp+0]
\r
308 movdqa xmm7, [rsp+16]
\r
309 movdqa xmm8, [rsp+32]
\r
310 movdqa xmm9, [rsp+48]
\r
311 movdqa xmm10, [rsp+64]
\r
312 movdqa xmm11, [rsp+80]
\r
313 movdqa xmm12, [rsp+96]
\r
314 movdqa xmm15, [rsp+112]
\r
315 add rsp,8+8*16 ; 8 = align stack , 8 xmm6-12,15 16 bytes each
\r
317 AES_CBC_decrypt ENDP
\r
320 ; AES_ECB_encrypt[const ,unsigned char*in
\r
321 ; unsigned ,char*out
\r
322 ; unsigned ,long length
\r
323 ; const ,unsigned char*KS
\r
326 ; . globl AES_ECB_encrypt
\r
327 AES_ECB_encrypt PROC
\r
328 ;# parameter 1: rdi
\r
329 ;# parameter 2: rsi
\r
330 ;# parameter 3: rdx
\r
331 ;# parameter 4: rcx
\r
332 ;# parameter 5: r8d
\r
334 ; save rdi and rsi to rax and r11, restore before ret
\r
338 ; convert to what we had for att&t convention
\r
345 ; on microsoft xmm6-xmm15 are non volaitle, let's save on stack and restore at end
\r
346 sub rsp,8+4*16 ; 8 = align stack , 4 xmm9-12, 16 bytes each
\r
347 movdqa [rsp+0], xmm9
\r
348 movdqa [rsp+16], xmm10
\r
349 movdqa [rsp+32], xmm11
\r
350 movdqa [rsp+48], xmm12
\r
363 je EECB_REMAINDER_4
\r
367 movdqu xmm2,16[rdi]
\r
368 movdqu xmm3,32[rdi]
\r
369 movdqu xmm4,48[rdi]
\r
371 movdqa xmm10,16[rcx]
\r
372 movdqa xmm11,32[rcx]
\r
373 movdqa xmm12,48[rcx]
\r
390 movdqa xmm9,64[rcx]
\r
391 movdqa xmm10,80[rcx]
\r
392 movdqa xmm11,96[rcx]
\r
393 movdqa xmm12,112[rcx]
\r
410 movdqa xmm9,128[rcx]
\r
411 movdqa xmm10,144[rcx]
\r
412 movdqa xmm11,160[rcx]
\r
423 movdqa xmm9,160[rcx]
\r
424 movdqa xmm10,176[rcx]
\r
425 movdqa xmm11,192[rcx]
\r
436 movdqa xmm9,192[rcx]
\r
437 movdqa xmm10,208[rcx]
\r
438 movdqa xmm11,224[rcx]
\r
451 aesenclast xmm1,xmm11
\r
452 aesenclast xmm2,xmm11
\r
453 aesenclast xmm3,xmm11
\r
454 aesenclast xmm4,xmm11
\r
456 movdqu 16[rsi],xmm2
\r
457 movdqu 32[rsi],xmm3
\r
458 movdqu 48[rsi],xmm4
\r
468 movdqu xmm2,160[rcx]
\r
469 aesenc xmm1,16[rcx]
\r
470 aesenc xmm1,32[rcx]
\r
471 aesenc xmm1,48[rcx]
\r
472 aesenc xmm1,64[rcx]
\r
473 aesenc xmm1,80[rcx]
\r
474 aesenc xmm1,96[rcx]
\r
475 aesenc xmm1,112[rcx]
\r
476 aesenc xmm1,128[rcx]
\r
477 aesenc xmm1,144[rcx]
\r
480 movdqu xmm2,192[rcx]
\r
481 aesenc xmm1,160[rcx]
\r
482 aesenc xmm1,176[rcx]
\r
485 movdqu xmm2,224[rcx]
\r
486 aesenc xmm1,192[rcx]
\r
487 aesenc xmm1,208[rcx]
\r
489 aesenclast xmm1,xmm2
\r
495 ; restore non volatile rdi,rsi
\r
498 ; restore non volatile xmms from stack
\r
499 movdqa xmm9, [rsp+0]
\r
500 movdqa xmm10, [rsp+16]
\r
501 movdqa xmm11, [rsp+32]
\r
502 movdqa xmm12, [rsp+48]
\r
503 add rsp,8+4*16 ; 8 = align stack , 4 xmm9-12 16 bytes each
\r
505 AES_ECB_encrypt ENDP
\r
508 ; AES_ECB_decrypt[const ,unsigned char*in
\r
509 ; unsigned ,char*out
\r
510 ; unsigned ,long length
\r
511 ; const ,unsigned char*KS
\r
514 ; . globl AES_ECB_decrypt
\r
515 AES_ECB_decrypt PROC
\r
516 ;# parameter 1: rdi
\r
517 ;# parameter 2: rsi
\r
518 ;# parameter 3: rdx
\r
519 ;# parameter 4: rcx
\r
520 ;# parameter 5: r8d
\r
522 ; save rdi and rsi to rax and r11, restore before ret
\r
526 ; convert to what we had for att&t convention
\r
533 ; on microsoft xmm6-xmm15 are non volaitle, let's save on stack and restore at end
\r
534 sub rsp,8+4*16 ; 8 = align stack , 4 xmm9-12, 16 bytes each
\r
535 movdqa [rsp+0], xmm9
\r
536 movdqa [rsp+16], xmm10
\r
537 movdqa [rsp+32], xmm11
\r
538 movdqa [rsp+48], xmm12
\r
550 je DECB_REMAINDER_4
\r
554 movdqu xmm2,16[rdi]
\r
555 movdqu xmm3,32[rdi]
\r
556 movdqu xmm4,48[rdi]
\r
558 movdqa xmm10,16[rcx]
\r
559 movdqa xmm11,32[rcx]
\r
560 movdqa xmm12,48[rcx]
\r
577 movdqa xmm9,64[rcx]
\r
578 movdqa xmm10,80[rcx]
\r
579 movdqa xmm11,96[rcx]
\r
580 movdqa xmm12,112[rcx]
\r
597 movdqa xmm9,128[rcx]
\r
598 movdqa xmm10,144[rcx]
\r
599 movdqa xmm11,160[rcx]
\r
610 movdqa xmm9,160[rcx]
\r
611 movdqa xmm10,176[rcx]
\r
612 movdqa xmm11,192[rcx]
\r
623 movdqa xmm9,192[rcx]
\r
624 movdqa xmm10,208[rcx]
\r
625 movdqa xmm11,224[rcx]
\r
638 aesdeclast xmm1,xmm11
\r
639 aesdeclast xmm2,xmm11
\r
640 aesdeclast xmm3,xmm11
\r
641 aesdeclast xmm4,xmm11
\r
643 movdqu 16[rsi],xmm2
\r
644 movdqu 32[rsi],xmm3
\r
645 movdqu 48[rsi],xmm4
\r
655 movdqu xmm2,160[rcx]
\r
657 aesdec xmm1,16[rcx]
\r
658 aesdec xmm1,32[rcx]
\r
659 aesdec xmm1,48[rcx]
\r
660 aesdec xmm1,64[rcx]
\r
661 aesdec xmm1,80[rcx]
\r
662 aesdec xmm1,96[rcx]
\r
663 aesdec xmm1,112[rcx]
\r
664 aesdec xmm1,128[rcx]
\r
665 aesdec xmm1,144[rcx]
\r
668 movdqu xmm2,192[rcx]
\r
669 aesdec xmm1,160[rcx]
\r
670 aesdec xmm1,176[rcx]
\r
672 movdqu xmm2,224[rcx]
\r
673 aesdec xmm1,192[rcx]
\r
674 aesdec xmm1,208[rcx]
\r
676 aesdeclast xmm1,xmm2
\r
682 ; restore non volatile rdi,rsi
\r
685 ; restore non volatile xmms from stack
\r
686 movdqa xmm9, [rsp+0]
\r
687 movdqa xmm10, [rsp+16]
\r
688 movdqa xmm11, [rsp+32]
\r
689 movdqa xmm12, [rsp+48]
\r
690 add rsp,8+4*16 ; 8 = align stack , 4 xmm9-12 16 bytes each
\r
692 AES_ECB_decrypt ENDP
\r
697 ; void ,AES_128_Key_Expansion[const unsigned char*userkey
\r
698 ; unsigned char*key_schedule]/
\r
701 ; . globl AES_128_Key_Expansion
\r
702 AES_128_Key_Expansion PROC
\r
703 ;# parameter 1: rdi
\r
704 ;# parameter 2: rsi
\r
706 ; save rdi and rsi to rax and r11, restore before ret
\r
710 ; convert to what we had for att&t convention
\r
714 mov dword ptr 240[rsi],10
\r
721 aeskeygenassist xmm2,xmm1,1
\r
722 call PREPARE_ROUNDKEY_128
\r
723 movdqa 16[rsi],xmm1
\r
725 aeskeygenassist xmm2,xmm1,2
\r
726 call PREPARE_ROUNDKEY_128
\r
727 movdqa 32[rsi],xmm1
\r
729 aeskeygenassist xmm2,xmm1,4
\r
730 call PREPARE_ROUNDKEY_128
\r
731 movdqa 48[rsi],xmm1
\r
733 aeskeygenassist xmm2,xmm1,8
\r
734 call PREPARE_ROUNDKEY_128
\r
735 movdqa 64[rsi],xmm1
\r
737 aeskeygenassist xmm2,xmm1,16
\r
738 call PREPARE_ROUNDKEY_128
\r
739 movdqa 80[rsi],xmm1
\r
741 aeskeygenassist xmm2,xmm1,32
\r
742 call PREPARE_ROUNDKEY_128
\r
743 movdqa 96[rsi],xmm1
\r
745 aeskeygenassist xmm2,xmm1,64
\r
746 call PREPARE_ROUNDKEY_128
\r
747 movdqa 112[rsi],xmm1
\r
748 aeskeygenassist xmm2,xmm1,80h
\r
749 call PREPARE_ROUNDKEY_128
\r
750 movdqa 128[rsi],xmm1
\r
751 aeskeygenassist xmm2,xmm1,1bh
\r
752 call PREPARE_ROUNDKEY_128
\r
753 movdqa 144[rsi],xmm1
\r
754 aeskeygenassist xmm2,xmm1,36h
\r
755 call PREPARE_ROUNDKEY_128
\r
756 movdqa 160[rsi],xmm1
\r
757 ; restore non volatile rdi,rsi
\r
762 PREPARE_ROUNDKEY_128:
\r
763 pshufd xmm2,xmm2,255
\r
773 AES_128_Key_Expansion ENDP
\r
776 ; void ,AES_192_Key_Expansion[const unsigned char*userkey
\r
777 ; unsigned char*key]
\r
779 ; . globl AES_192_Key_Expansion
\r
780 AES_192_Key_Expansion PROC
\r
781 ;# parameter 1: rdi
\r
782 ;# parameter 2: rsi
\r
784 ; save rdi and rsi to rax and r11, restore before ret
\r
788 ; convert to what we had for att&t convention
\r
792 ; on microsoft xmm6-xmm15 are non volaitle, let's save on stack and restore at end
\r
793 sub rsp,8+1*16 ; 8 = align stack , 1 xmm6, 16 bytes each
\r
794 movdqa [rsp+0], xmm6
\r
797 movdqu xmm3,16[rdi]
\r
801 aeskeygenassist xmm2,xmm3,1h
\r
802 call PREPARE_ROUNDKEY_192
\r
804 movdqa 16[rsi],xmm5
\r
807 movdqa 32[rsi],xmm6
\r
809 aeskeygenassist xmm2,xmm3,2h
\r
810 call PREPARE_ROUNDKEY_192
\r
811 movdqa 48[rsi],xmm1
\r
814 aeskeygenassist xmm2,xmm3,4h
\r
815 call PREPARE_ROUNDKEY_192
\r
817 movdqa 64[rsi],xmm5
\r
820 movdqa 80[rsi],xmm6
\r
822 aeskeygenassist xmm2,xmm3,8h
\r
823 call PREPARE_ROUNDKEY_192
\r
824 movdqa 96[rsi],xmm1
\r
827 aeskeygenassist xmm2,xmm3,10h
\r
828 call PREPARE_ROUNDKEY_192
\r
830 movdqa 112[rsi],xmm5
\r
833 movdqa 128[rsi],xmm6
\r
835 aeskeygenassist xmm2,xmm3,20h
\r
836 call PREPARE_ROUNDKEY_192
\r
837 movdqa 144[rsi],xmm1
\r
840 aeskeygenassist xmm2,xmm3,40h
\r
841 call PREPARE_ROUNDKEY_192
\r
843 movdqa 160[rsi],xmm5
\r
846 movdqa 176[rsi],xmm6
\r
848 aeskeygenassist xmm2,xmm3,80h
\r
849 call PREPARE_ROUNDKEY_192
\r
850 movdqa 192[rsi],xmm1
\r
851 movdqa 208[rsi],xmm3
\r
852 ; restore non volatile rdi,rsi
\r
855 ; restore non volatile xmms from stack
\r
856 movdqa xmm6, [rsp+0]
\r
857 add rsp,8+1*16 ; 8 = align stack , 1 xmm6 16 bytes each
\r
860 PREPARE_ROUNDKEY_192:
\r
861 pshufd xmm2,xmm2,55h
\r
871 pshufd xmm2,xmm1,0ffh
\r
877 AES_192_Key_Expansion ENDP
\r
880 ; void ,AES_256_Key_Expansion[const unsigned char*userkey
\r
881 ; unsigned char*key]
\r
883 ; . globl AES_256_Key_Expansion
\r
884 AES_256_Key_Expansion PROC
\r
885 ;# parameter 1: rdi
\r
886 ;# parameter 2: rsi
\r
888 ; save rdi and rsi to rax and r11, restore before ret
\r
892 ; convert to what we had for att&t convention
\r
897 movdqu xmm3,16[rdi]
\r
899 movdqa 16[rsi],xmm3
\r
901 aeskeygenassist xmm2,xmm3,1h
\r
903 movdqa 32[rsi],xmm1
\r
904 aeskeygenassist xmm2,xmm1,0h
\r
906 movdqa 48[rsi],xmm3
\r
907 aeskeygenassist xmm2,xmm3,2h
\r
909 movdqa 64[rsi],xmm1
\r
910 aeskeygenassist xmm2,xmm1,0h
\r
912 movdqa 80[rsi],xmm3
\r
913 aeskeygenassist xmm2,xmm3,4h
\r
915 movdqa 96[rsi],xmm1
\r
916 aeskeygenassist xmm2,xmm1,0h
\r
918 movdqa 112[rsi],xmm3
\r
919 aeskeygenassist xmm2,xmm3,8h
\r
921 movdqa 128[rsi],xmm1
\r
922 aeskeygenassist xmm2,xmm1,0h
\r
924 movdqa 144[rsi],xmm3
\r
925 aeskeygenassist xmm2,xmm3,10h
\r
927 movdqa 160[rsi],xmm1
\r
928 aeskeygenassist xmm2,xmm1,0h
\r
930 movdqa 176[rsi],xmm3
\r
931 aeskeygenassist xmm2,xmm3,20h
\r
933 movdqa 192[rsi],xmm1
\r
935 aeskeygenassist xmm2,xmm1,0h
\r
937 movdqa 208[rsi],xmm3
\r
938 aeskeygenassist xmm2,xmm3,40h
\r
940 movdqa 224[rsi],xmm1
\r
942 ; restore non volatile rdi,rsi
\r
946 AES_256_Key_Expansion ENDP
\r
949 pshufd xmm2,xmm2,0ffh
\r
961 pshufd xmm2,xmm2,0aah
\r