Deleted Added
full compact
rsaz-avx2.S (305153) rsaz-avx2.S (326663)
1/* $FreeBSD: stable/11/secure/lib/libcrypto/amd64/rsaz-avx2.S 305153 2016-08-31 20:33:59Z jkim $ */
1/* $FreeBSD: stable/11/secure/lib/libcrypto/amd64/rsaz-avx2.S 326663 2017-12-07 18:04:48Z jkim $ */
2/* Do not modify. This file is auto-generated from rsaz-avx2.pl. */
3.text
4
5.globl rsaz_1024_sqr_avx2
6.type rsaz_1024_sqr_avx2,@function
7.align 64
8rsaz_1024_sqr_avx2:
9 leaq (%rsp),%rax

--- 53 unchanged lines hidden (view full) ---

63 vmovdqu 96-128(%rsi),%ymm3
64 vmovdqu 128-128(%rsi),%ymm4
65 vmovdqu 160-128(%rsi),%ymm5
66 vmovdqu 192-128(%rsi),%ymm6
67 vmovdqu 224-128(%rsi),%ymm7
68 vmovdqu 256-128(%rsi),%ymm8
69
70 leaq 192(%rsp),%rbx
2/* Do not modify. This file is auto-generated from rsaz-avx2.pl. */
3.text
4
5.globl rsaz_1024_sqr_avx2
6.type rsaz_1024_sqr_avx2,@function
7.align 64
8rsaz_1024_sqr_avx2:
9 leaq (%rsp),%rax

--- 53 unchanged lines hidden (view full) ---

63 vmovdqu 96-128(%rsi),%ymm3
64 vmovdqu 128-128(%rsi),%ymm4
65 vmovdqu 160-128(%rsi),%ymm5
66 vmovdqu 192-128(%rsi),%ymm6
67 vmovdqu 224-128(%rsi),%ymm7
68 vmovdqu 256-128(%rsi),%ymm8
69
70 leaq 192(%rsp),%rbx
71 vpbroadcastq .Land_mask(%rip),%ymm15
71 vmovdqu .Land_mask(%rip),%ymm15
72 jmp .LOOP_GRANDE_SQR_1024
73
74.align 32
75.LOOP_GRANDE_SQR_1024:
76 leaq 576+128(%rsp),%r9
77 leaq 448(%rsp),%r12
78
79

--- 716 unchanged lines hidden (view full) ---

796 vpaddq %ymm12,%ymm3,%ymm3
797 vpmuludq 128-128(%rcx),%ymm11,%ymm13
798 vpaddq %ymm13,%ymm4,%ymm4
799 vpmuludq 160-128(%rcx),%ymm11,%ymm0
800 vpaddq %ymm0,%ymm5,%ymm5
801 vpmuludq 192-128(%rcx),%ymm11,%ymm12
802 vpaddq %ymm12,%ymm6,%ymm6
803 vpmuludq 224-128(%rcx),%ymm11,%ymm13
72 jmp .LOOP_GRANDE_SQR_1024
73
74.align 32
75.LOOP_GRANDE_SQR_1024:
76 leaq 576+128(%rsp),%r9
77 leaq 448(%rsp),%r12
78
79

--- 716 unchanged lines hidden (view full) ---

796 vpaddq %ymm12,%ymm3,%ymm3
797 vpmuludq 128-128(%rcx),%ymm11,%ymm13
798 vpaddq %ymm13,%ymm4,%ymm4
799 vpmuludq 160-128(%rcx),%ymm11,%ymm0
800 vpaddq %ymm0,%ymm5,%ymm5
801 vpmuludq 192-128(%rcx),%ymm11,%ymm12
802 vpaddq %ymm12,%ymm6,%ymm6
803 vpmuludq 224-128(%rcx),%ymm11,%ymm13
804 vpblendd $3,%ymm14,%ymm9,%ymm9
804 vpblendd $3,%ymm14,%ymm9,%ymm12
805 vpaddq %ymm13,%ymm7,%ymm7
806 vpmuludq 256-128(%rcx),%ymm11,%ymm0
805 vpaddq %ymm13,%ymm7,%ymm7
806 vpmuludq 256-128(%rcx),%ymm11,%ymm0
807 vpaddq %ymm9,%ymm3,%ymm3
807 vpaddq %ymm12,%ymm3,%ymm3
808 vpaddq %ymm0,%ymm8,%ymm8
809
810 movq %rbx,%rax
811 imulq -128(%rsi),%rax
812 addq %rax,%r10
813 vmovdqu -8+32-128(%rsi),%ymm12
814 movq %rbx,%rax
815 imulq 8-128(%rsi),%rax
816 addq %rax,%r11
817 vmovdqu -8+64-128(%rsi),%ymm13
818
819 movq %r10,%rax
808 vpaddq %ymm0,%ymm8,%ymm8
809
810 movq %rbx,%rax
811 imulq -128(%rsi),%rax
812 addq %rax,%r10
813 vmovdqu -8+32-128(%rsi),%ymm12
814 movq %rbx,%rax
815 imulq 8-128(%rsi),%rax
816 addq %rax,%r11
817 vmovdqu -8+64-128(%rsi),%ymm13
818
819 movq %r10,%rax
820 vpblendd $0xfc,%ymm14,%ymm9,%ymm9
820 imull %r8d,%eax
821 imull %r8d,%eax
822 vpaddq %ymm9,%ymm4,%ymm4
821 andl $0x1fffffff,%eax
822
823 imulq 16-128(%rsi),%rbx
824 addq %rbx,%r12
825 vpmuludq %ymm10,%ymm12,%ymm12
826 vmovd %eax,%xmm11
827 vmovdqu -8+96-128(%rsi),%ymm0
828 vpaddq %ymm12,%ymm1,%ymm1

--- 212 unchanged lines hidden (view full) ---

1041 addq (%rsp),%r9
1042 vpaddq %ymm12,%ymm8,%ymm7
1043 vpmuludq %ymm11,%ymm13,%ymm13
1044 vmovq %r12,%xmm12
1045 vpaddq %ymm13,%ymm9,%ymm8
1046
1047 decl %r14d
1048 jnz .Loop_mul_1024
823 andl $0x1fffffff,%eax
824
825 imulq 16-128(%rsi),%rbx
826 addq %rbx,%r12
827 vpmuludq %ymm10,%ymm12,%ymm12
828 vmovd %eax,%xmm11
829 vmovdqu -8+96-128(%rsi),%ymm0
830 vpaddq %ymm12,%ymm1,%ymm1

--- 212 unchanged lines hidden (view full) ---

1043 addq (%rsp),%r9
1044 vpaddq %ymm12,%ymm8,%ymm7
1045 vpmuludq %ymm11,%ymm13,%ymm13
1046 vmovq %r12,%xmm12
1047 vpaddq %ymm13,%ymm9,%ymm8
1048
1049 decl %r14d
1050 jnz .Loop_mul_1024
1049 vpermq $0,%ymm15,%ymm15
1050 vpaddq (%rsp),%ymm12,%ymm0
1051
1052 vpsrlq $29,%ymm0,%ymm12
1053 vpand %ymm15,%ymm0,%ymm0
1054 vpsrlq $29,%ymm1,%ymm13
1055 vpand %ymm15,%ymm1,%ymm1
1056 vpsrlq $29,%ymm2,%ymm10
1057 vpermq $0x93,%ymm12,%ymm12

--- 623 unchanged lines hidden (view full) ---

1681 cmovel %edx,%eax
1682 andl $32,%eax
1683 shrl $5,%eax
1684 .byte 0xf3,0xc3
1685.size rsaz_avx2_eligible,.-rsaz_avx2_eligible
1686
1687.align 64
1688.Land_mask:
1051 vpaddq (%rsp),%ymm12,%ymm0
1052
1053 vpsrlq $29,%ymm0,%ymm12
1054 vpand %ymm15,%ymm0,%ymm0
1055 vpsrlq $29,%ymm1,%ymm13
1056 vpand %ymm15,%ymm1,%ymm1
1057 vpsrlq $29,%ymm2,%ymm10
1058 vpermq $0x93,%ymm12,%ymm12

--- 623 unchanged lines hidden (view full) ---

1682 cmovel %edx,%eax
1683 andl $32,%eax
1684 shrl $5,%eax
1685 .byte 0xf3,0xc3
1686.size rsaz_avx2_eligible,.-rsaz_avx2_eligible
1687
1688.align 64
1689.Land_mask:
1689.quad 0x1fffffff,0x1fffffff,0x1fffffff,-1
1690.quad 0x1fffffff,0x1fffffff,0x1fffffff,0x1fffffff
1690.Lscatter_permd:
1691.long 0,2,4,6,7,7,7,7
1692.Lgather_permd:
1693.long 0,7,1,7,2,7,3,7
1694.Linc:
1695.long 0,0,0,0, 1,1,1,1
1696.long 2,2,2,2, 3,3,3,3
1697.long 4,4,4,4, 4,4,4,4
1698.align 64
1691.Lscatter_permd:
1692.long 0,2,4,6,7,7,7,7
1693.Lgather_permd:
1694.long 0,7,1,7,2,7,3,7
1695.Linc:
1696.long 0,0,0,0, 1,1,1,1
1697.long 2,2,2,2, 3,3,3,3
1698.long 4,4,4,4, 4,4,4,4
1699.align 64