rsaz-avx2.S (305153) | rsaz-avx2.S (326663) |
---|---|
1/* $FreeBSD: stable/11/secure/lib/libcrypto/amd64/rsaz-avx2.S 305153 2016-08-31 20:33:59Z jkim $ */ | 1/* $FreeBSD: stable/11/secure/lib/libcrypto/amd64/rsaz-avx2.S 326663 2017-12-07 18:04:48Z jkim $ */ |
2/* Do not modify. This file is auto-generated from rsaz-avx2.pl. */ 3.text 4 5.globl rsaz_1024_sqr_avx2 6.type rsaz_1024_sqr_avx2,@function 7.align 64 8rsaz_1024_sqr_avx2: 9 leaq (%rsp),%rax --- 53 unchanged lines hidden (view full) --- 63 vmovdqu 96-128(%rsi),%ymm3 64 vmovdqu 128-128(%rsi),%ymm4 65 vmovdqu 160-128(%rsi),%ymm5 66 vmovdqu 192-128(%rsi),%ymm6 67 vmovdqu 224-128(%rsi),%ymm7 68 vmovdqu 256-128(%rsi),%ymm8 69 70 leaq 192(%rsp),%rbx | 2/* Do not modify. This file is auto-generated from rsaz-avx2.pl. */ 3.text 4 5.globl rsaz_1024_sqr_avx2 6.type rsaz_1024_sqr_avx2,@function 7.align 64 8rsaz_1024_sqr_avx2: 9 leaq (%rsp),%rax --- 53 unchanged lines hidden (view full) --- 63 vmovdqu 96-128(%rsi),%ymm3 64 vmovdqu 128-128(%rsi),%ymm4 65 vmovdqu 160-128(%rsi),%ymm5 66 vmovdqu 192-128(%rsi),%ymm6 67 vmovdqu 224-128(%rsi),%ymm7 68 vmovdqu 256-128(%rsi),%ymm8 69 70 leaq 192(%rsp),%rbx |
71 vpbroadcastq .Land_mask(%rip),%ymm15 | 71 vmovdqu .Land_mask(%rip),%ymm15 |
72 jmp .LOOP_GRANDE_SQR_1024 73 74.align 32 75.LOOP_GRANDE_SQR_1024: 76 leaq 576+128(%rsp),%r9 77 leaq 448(%rsp),%r12 78 79 --- 716 unchanged lines hidden (view full) --- 796 vpaddq %ymm12,%ymm3,%ymm3 797 vpmuludq 128-128(%rcx),%ymm11,%ymm13 798 vpaddq %ymm13,%ymm4,%ymm4 799 vpmuludq 160-128(%rcx),%ymm11,%ymm0 800 vpaddq %ymm0,%ymm5,%ymm5 801 vpmuludq 192-128(%rcx),%ymm11,%ymm12 802 vpaddq %ymm12,%ymm6,%ymm6 803 vpmuludq 224-128(%rcx),%ymm11,%ymm13 | 72 jmp .LOOP_GRANDE_SQR_1024 73 74.align 32 75.LOOP_GRANDE_SQR_1024: 76 leaq 576+128(%rsp),%r9 77 leaq 448(%rsp),%r12 78 79 --- 716 unchanged lines hidden (view full) --- 796 vpaddq %ymm12,%ymm3,%ymm3 797 vpmuludq 128-128(%rcx),%ymm11,%ymm13 798 vpaddq %ymm13,%ymm4,%ymm4 799 vpmuludq 160-128(%rcx),%ymm11,%ymm0 800 vpaddq %ymm0,%ymm5,%ymm5 801 vpmuludq 192-128(%rcx),%ymm11,%ymm12 802 vpaddq %ymm12,%ymm6,%ymm6 803 vpmuludq 224-128(%rcx),%ymm11,%ymm13 |
804 vpblendd $3,%ymm14,%ymm9,%ymm9 | 804 vpblendd $3,%ymm14,%ymm9,%ymm12 |
805 vpaddq %ymm13,%ymm7,%ymm7 806 vpmuludq 256-128(%rcx),%ymm11,%ymm0 | 805 vpaddq %ymm13,%ymm7,%ymm7 806 vpmuludq 256-128(%rcx),%ymm11,%ymm0 |
807 vpaddq %ymm9,%ymm3,%ymm3 | 807 vpaddq %ymm12,%ymm3,%ymm3 |
808 vpaddq %ymm0,%ymm8,%ymm8 809 810 movq %rbx,%rax 811 imulq -128(%rsi),%rax 812 addq %rax,%r10 813 vmovdqu -8+32-128(%rsi),%ymm12 814 movq %rbx,%rax 815 imulq 8-128(%rsi),%rax 816 addq %rax,%r11 817 vmovdqu -8+64-128(%rsi),%ymm13 818 819 movq %r10,%rax | 808 vpaddq %ymm0,%ymm8,%ymm8 809 810 movq %rbx,%rax 811 imulq -128(%rsi),%rax 812 addq %rax,%r10 813 vmovdqu -8+32-128(%rsi),%ymm12 814 movq %rbx,%rax 815 imulq 8-128(%rsi),%rax 816 addq %rax,%r11 817 vmovdqu -8+64-128(%rsi),%ymm13 818 819 movq %r10,%rax |
820 vpblendd $0xfc,%ymm14,%ymm9,%ymm9 |
|
820 imull %r8d,%eax | 821 imull %r8d,%eax |
822 vpaddq %ymm9,%ymm4,%ymm4 |
|
821 andl $0x1fffffff,%eax 822 823 imulq 16-128(%rsi),%rbx 824 addq %rbx,%r12 825 vpmuludq %ymm10,%ymm12,%ymm12 826 vmovd %eax,%xmm11 827 vmovdqu -8+96-128(%rsi),%ymm0 828 vpaddq %ymm12,%ymm1,%ymm1 --- 212 unchanged lines hidden (view full) --- 1041 addq (%rsp),%r9 1042 vpaddq %ymm12,%ymm8,%ymm7 1043 vpmuludq %ymm11,%ymm13,%ymm13 1044 vmovq %r12,%xmm12 1045 vpaddq %ymm13,%ymm9,%ymm8 1046 1047 decl %r14d 1048 jnz .Loop_mul_1024 | 823 andl $0x1fffffff,%eax 824 825 imulq 16-128(%rsi),%rbx 826 addq %rbx,%r12 827 vpmuludq %ymm10,%ymm12,%ymm12 828 vmovd %eax,%xmm11 829 vmovdqu -8+96-128(%rsi),%ymm0 830 vpaddq %ymm12,%ymm1,%ymm1 --- 212 unchanged lines hidden (view full) --- 1043 addq (%rsp),%r9 1044 vpaddq %ymm12,%ymm8,%ymm7 1045 vpmuludq %ymm11,%ymm13,%ymm13 1046 vmovq %r12,%xmm12 1047 vpaddq %ymm13,%ymm9,%ymm8 1048 1049 decl %r14d 1050 jnz .Loop_mul_1024 |
1049 vpermq $0,%ymm15,%ymm15 | |
1050 vpaddq (%rsp),%ymm12,%ymm0 1051 1052 vpsrlq $29,%ymm0,%ymm12 1053 vpand %ymm15,%ymm0,%ymm0 1054 vpsrlq $29,%ymm1,%ymm13 1055 vpand %ymm15,%ymm1,%ymm1 1056 vpsrlq $29,%ymm2,%ymm10 1057 vpermq $0x93,%ymm12,%ymm12 --- 623 unchanged lines hidden (view full) --- 1681 cmovel %edx,%eax 1682 andl $32,%eax 1683 shrl $5,%eax 1684 .byte 0xf3,0xc3 1685.size rsaz_avx2_eligible,.-rsaz_avx2_eligible 1686 1687.align 64 1688.Land_mask: | 1051 vpaddq (%rsp),%ymm12,%ymm0 1052 1053 vpsrlq $29,%ymm0,%ymm12 1054 vpand %ymm15,%ymm0,%ymm0 1055 vpsrlq $29,%ymm1,%ymm13 1056 vpand %ymm15,%ymm1,%ymm1 1057 vpsrlq $29,%ymm2,%ymm10 1058 vpermq $0x93,%ymm12,%ymm12 --- 623 unchanged lines hidden (view full) --- 1682 cmovel %edx,%eax 1683 andl $32,%eax 1684 shrl $5,%eax 1685 .byte 0xf3,0xc3 1686.size rsaz_avx2_eligible,.-rsaz_avx2_eligible 1687 1688.align 64 1689.Land_mask: |
1689.quad 0x1fffffff,0x1fffffff,0x1fffffff,-1 | 1690.quad 0x1fffffff,0x1fffffff,0x1fffffff,0x1fffffff |
1690.Lscatter_permd: 1691.long 0,2,4,6,7,7,7,7 1692.Lgather_permd: 1693.long 0,7,1,7,2,7,3,7 1694.Linc: 1695.long 0,0,0,0, 1,1,1,1 1696.long 2,2,2,2, 3,3,3,3 1697.long 4,4,4,4, 4,4,4,4 1698.align 64 | 1691.Lscatter_permd: 1692.long 0,2,4,6,7,7,7,7 1693.Lgather_permd: 1694.long 0,7,1,7,2,7,3,7 1695.Linc: 1696.long 0,0,0,0, 1,1,1,1 1697.long 2,2,2,2, 3,3,3,3 1698.long 4,4,4,4, 4,4,4,4 1699.align 64 |