x86-gf2m.s revision 256281
1178479Sjb # $FreeBSD: stable/10/secure/lib/libcrypto/i386/x86-gf2m.s 238405 2012-07-12 19:30:53Z jkim $ 2178479Sjb.file "x86-gf2m.s" 3178479Sjb.text 4178479Sjb.type _mul_1x1_mmx,@function 5178479Sjb.align 16 6178479Sjb_mul_1x1_mmx: 7178479Sjb subl $36,%esp 8178479Sjb movl %eax,%ecx 9178479Sjb leal (%eax,%eax,1),%edx 10178479Sjb andl $1073741823,%ecx 11178479Sjb leal (%edx,%edx,1),%ebp 12178479Sjb movl $0,(%esp) 13178479Sjb andl $2147483647,%edx 14178479Sjb movd %eax,%mm2 15178479Sjb movd %ebx,%mm3 16178479Sjb movl %ecx,4(%esp) 17178479Sjb xorl %edx,%ecx 18178479Sjb pxor %mm5,%mm5 19178479Sjb pxor %mm4,%mm4 20178479Sjb movl %edx,8(%esp) 21178479Sjb xorl %ebp,%edx 22210767Srpaulo movl %ecx,12(%esp) 23178479Sjb pcmpgtd %mm2,%mm5 24178479Sjb paddd %mm2,%mm2 25178479Sjb xorl %edx,%ecx 26237624Spfg movl %ebp,16(%esp) 27237624Spfg xorl %edx,%ebp 28248708Spfg pand %mm3,%mm5 29237624Spfg pcmpgtd %mm2,%mm4 30237624Spfg movl %ecx,20(%esp) 31178479Sjb xorl %ecx,%ebp 32178479Sjb psllq $31,%mm5 33178479Sjb pand %mm3,%mm4 34178479Sjb movl %edx,24(%esp) 35178479Sjb movl $7,%esi 36178479Sjb movl %ebp,28(%esp) 37178479Sjb movl %esi,%ebp 38178576Sjb andl %ebx,%esi 39178479Sjb shrl $3,%ebx 40178576Sjb movl %ebp,%edi 41178479Sjb psllq $30,%mm4 42211554Srpaulo andl %ebx,%edi 43211554Srpaulo shrl $3,%ebx 44211554Srpaulo movd (%esp,%esi,4),%mm0 45178479Sjb movl %ebp,%esi 46178479Sjb andl %ebx,%esi 47178479Sjb shrl $3,%ebx 48178479Sjb movd (%esp,%edi,4),%mm2 49178479Sjb movl %ebp,%edi 50178479Sjb psllq $3,%mm2 51178479Sjb andl %ebx,%edi 52178479Sjb shrl $3,%ebx 53178479Sjb pxor %mm2,%mm0 54178479Sjb movd (%esp,%esi,4),%mm1 55178479Sjb movl %ebp,%esi 56178479Sjb psllq $6,%mm1 57178479Sjb andl %ebx,%esi 58178479Sjb shrl $3,%ebx 59178479Sjb pxor %mm1,%mm0 60178479Sjb movd (%esp,%edi,4),%mm2 61178479Sjb movl %ebp,%edi 62178479Sjb psllq $9,%mm2 63178479Sjb andl %ebx,%edi 64178479Sjb shrl $3,%ebx 65178479Sjb pxor %mm2,%mm0 66178479Sjb movd (%esp,%esi,4),%mm1 67178479Sjb movl %ebp,%esi 68178479Sjb psllq $12,%mm1 69178479Sjb andl %ebx,%esi 70178479Sjb shrl $3,%ebx 71178479Sjb pxor %mm1,%mm0 72178479Sjb movd (%esp,%edi,4),%mm2 73178479Sjb movl %ebp,%edi 74178479Sjb psllq $15,%mm2 75178479Sjb andl %ebx,%edi 76178479Sjb shrl $3,%ebx 77178479Sjb pxor %mm2,%mm0 78178479Sjb movd (%esp,%esi,4),%mm1 79178479Sjb movl %ebp,%esi 80178479Sjb psllq $18,%mm1 81178479Sjb andl %ebx,%esi 82178479Sjb shrl $3,%ebx 83178479Sjb pxor %mm1,%mm0 84178479Sjb movd (%esp,%edi,4),%mm2 85178479Sjb movl %ebp,%edi 86178479Sjb psllq $21,%mm2 87178479Sjb andl %ebx,%edi 88178479Sjb shrl $3,%ebx 89178479Sjb pxor %mm2,%mm0 90178479Sjb movd (%esp,%esi,4),%mm1 91178479Sjb movl %ebp,%esi 92178479Sjb psllq $24,%mm1 93178479Sjb andl %ebx,%esi 94178479Sjb shrl $3,%ebx 95178479Sjb pxor %mm1,%mm0 96178479Sjb movd (%esp,%edi,4),%mm2 97178479Sjb pxor %mm4,%mm0 98178479Sjb psllq $27,%mm2 99178479Sjb pxor %mm2,%mm0 100178479Sjb movd (%esp,%esi,4),%mm1 101178479Sjb pxor %mm5,%mm0 102178479Sjb psllq $30,%mm1 103178479Sjb addl $36,%esp 104178479Sjb pxor %mm1,%mm0 105178479Sjb ret 106178479Sjb.size _mul_1x1_mmx,.-_mul_1x1_mmx 107178479Sjb.type _mul_1x1_ialu,@function 108178479Sjb.align 16 109178479Sjb_mul_1x1_ialu: 110178479Sjb subl $36,%esp 111178479Sjb movl %eax,%ecx 112178479Sjb leal (%eax,%eax,1),%edx 113178479Sjb leal (,%eax,4),%ebp 114178479Sjb andl $1073741823,%ecx 115178479Sjb leal (%eax,%eax,1),%edi 116178479Sjb sarl $31,%eax 117178479Sjb movl $0,(%esp) 118178479Sjb andl $2147483647,%edx 119178479Sjb movl %ecx,4(%esp) 120178479Sjb xorl %edx,%ecx 121178479Sjb movl %edx,8(%esp) 122178479Sjb xorl %ebp,%edx 123178479Sjb movl %ecx,12(%esp) 124178479Sjb xorl %edx,%ecx 125178479Sjb movl %ebp,16(%esp) 126178479Sjb xorl %edx,%ebp 127178479Sjb movl %ecx,20(%esp) 128178479Sjb xorl %ecx,%ebp 129178479Sjb sarl $31,%edi 130178479Sjb andl %ebx,%eax 131178479Sjb movl %edx,24(%esp) 132178479Sjb andl %ebx,%edi 133178479Sjb movl %ebp,28(%esp) 134178479Sjb movl %eax,%edx 135178479Sjb shll $31,%eax 136178479Sjb movl %edi,%ecx 137178479Sjb shrl $1,%edx 138178479Sjb movl $7,%esi 139178479Sjb shll $30,%edi 140178479Sjb andl %ebx,%esi 141178479Sjb shrl $2,%ecx 142178479Sjb xorl %edi,%eax 143178479Sjb shrl $3,%ebx 144178479Sjb movl $7,%edi 145178479Sjb andl %ebx,%edi 146178479Sjb shrl $3,%ebx 147178479Sjb xorl %ecx,%edx 148178479Sjb xorl (%esp,%esi,4),%eax 149178479Sjb movl $7,%esi 150178479Sjb andl %ebx,%esi 151178479Sjb shrl $3,%ebx 152178479Sjb movl (%esp,%edi,4),%ebp 153178479Sjb movl $7,%edi 154178479Sjb movl %ebp,%ecx 155178479Sjb shll $3,%ebp 156178479Sjb andl %ebx,%edi 157178479Sjb shrl $29,%ecx 158178479Sjb xorl %ebp,%eax 159178479Sjb shrl $3,%ebx 160178479Sjb xorl %ecx,%edx 161178479Sjb movl (%esp,%esi,4),%ecx 162178479Sjb movl $7,%esi 163178479Sjb movl %ecx,%ebp 164178479Sjb shll $6,%ecx 165178479Sjb andl %ebx,%esi 166178479Sjb shrl $26,%ebp 167178479Sjb xorl %ecx,%eax 168178479Sjb shrl $3,%ebx 169178479Sjb xorl %ebp,%edx 170178479Sjb movl (%esp,%edi,4),%ebp 171178479Sjb movl $7,%edi 172178479Sjb movl %ebp,%ecx 173178479Sjb shll $9,%ebp 174178479Sjb andl %ebx,%edi 175178479Sjb shrl $23,%ecx 176178479Sjb xorl %ebp,%eax 177178479Sjb shrl $3,%ebx 178178479Sjb xorl %ecx,%edx 179178479Sjb movl (%esp,%esi,4),%ecx 180178479Sjb movl $7,%esi 181178479Sjb movl %ecx,%ebp 182178479Sjb shll $12,%ecx 183178479Sjb andl %ebx,%esi 184178479Sjb shrl $20,%ebp 185178479Sjb xorl %ecx,%eax 186178479Sjb shrl $3,%ebx 187178479Sjb xorl %ebp,%edx 188178479Sjb movl (%esp,%edi,4),%ebp 189178479Sjb movl $7,%edi 190178479Sjb movl %ebp,%ecx 191178479Sjb shll $15,%ebp 192178479Sjb andl %ebx,%edi 193178479Sjb shrl $17,%ecx 194178479Sjb xorl %ebp,%eax 195178479Sjb shrl $3,%ebx 196178479Sjb xorl %ecx,%edx 197178479Sjb movl (%esp,%esi,4),%ecx 198178479Sjb movl $7,%esi 199178479Sjb movl %ecx,%ebp 200178479Sjb shll $18,%ecx 201178479Sjb andl %ebx,%esi 202178479Sjb shrl $14,%ebp 203178479Sjb xorl %ecx,%eax 204178479Sjb shrl $3,%ebx 205178479Sjb xorl %ebp,%edx 206178479Sjb movl (%esp,%edi,4),%ebp 207178479Sjb movl $7,%edi 208178479Sjb movl %ebp,%ecx 209178479Sjb shll $21,%ebp 210178479Sjb andl %ebx,%edi 211178479Sjb shrl $11,%ecx 212178479Sjb xorl %ebp,%eax 213178479Sjb shrl $3,%ebx 214178479Sjb xorl %ecx,%edx 215178479Sjb movl (%esp,%esi,4),%ecx 216178479Sjb movl $7,%esi 217178479Sjb movl %ecx,%ebp 218178479Sjb shll $24,%ecx 219178479Sjb andl %ebx,%esi 220178479Sjb shrl $8,%ebp 221178479Sjb xorl %ecx,%eax 222178479Sjb shrl $3,%ebx 223178479Sjb xorl %ebp,%edx 224178479Sjb movl (%esp,%edi,4),%ebp 225178479Sjb movl %ebp,%ecx 226178479Sjb shll $27,%ebp 227178479Sjb movl (%esp,%esi,4),%edi 228178479Sjb shrl $5,%ecx 229178479Sjb movl %edi,%esi 230178479Sjb xorl %ebp,%eax 231178479Sjb shll $30,%edi 232178479Sjb xorl %ecx,%edx 233178479Sjb shrl $2,%esi 234178479Sjb xorl %edi,%eax 235178479Sjb xorl %esi,%edx 236178479Sjb addl $36,%esp 237178479Sjb ret 238178479Sjb.size _mul_1x1_ialu,.-_mul_1x1_ialu 239178479Sjb.globl bn_GF2m_mul_2x2 240178479Sjb.type bn_GF2m_mul_2x2,@function 241178479Sjb.align 16 242178479Sjbbn_GF2m_mul_2x2: 243178479Sjb.L_bn_GF2m_mul_2x2_begin: 244178479Sjb leal OPENSSL_ia32cap_P,%edx 245178479Sjb movl (%edx),%eax 246178479Sjb movl 4(%edx),%edx 247178479Sjb testl $8388608,%eax 248178479Sjb jz .L000ialu 249178479Sjb testl $16777216,%eax 250178479Sjb jz .L001mmx 251178479Sjb testl $2,%edx 252178479Sjb jz .L001mmx 253178479Sjb movups 8(%esp),%xmm0 254178479Sjb shufps $177,%xmm0,%xmm0 255178479Sjb.byte 102,15,58,68,192,1 256178479Sjb movl 4(%esp),%eax 257178479Sjb movups %xmm0,(%eax) 258178479Sjb ret 259178479Sjb.align 16 260178479Sjb.L001mmx: 261178479Sjb pushl %ebp 262178479Sjb pushl %ebx 263178479Sjb pushl %esi 264178479Sjb pushl %edi 265178479Sjb movl 24(%esp),%eax 266178479Sjb movl 32(%esp),%ebx 267178479Sjb call _mul_1x1_mmx 268178479Sjb movq %mm0,%mm7 269178479Sjb movl 28(%esp),%eax 270178479Sjb movl 36(%esp),%ebx 271178479Sjb call _mul_1x1_mmx 272178479Sjb movq %mm0,%mm6 273178479Sjb movl 24(%esp),%eax 274178479Sjb movl 32(%esp),%ebx 275178479Sjb xorl 28(%esp),%eax 276178479Sjb xorl 36(%esp),%ebx 277178479Sjb call _mul_1x1_mmx 278178479Sjb pxor %mm7,%mm0 279178479Sjb movl 20(%esp),%eax 280178479Sjb pxor %mm6,%mm0 281178479Sjb movq %mm0,%mm2 282178479Sjb psllq $32,%mm0 283178479Sjb popl %edi 284178479Sjb psrlq $32,%mm2 285178479Sjb popl %esi 286178479Sjb pxor %mm6,%mm0 287178479Sjb popl %ebx 288178479Sjb pxor %mm7,%mm2 289178479Sjb movq %mm0,(%eax) 290178479Sjb popl %ebp 291178479Sjb movq %mm2,8(%eax) 292178479Sjb emms 293178479Sjb ret 294178479Sjb.align 16 295178479Sjb.L000ialu: 296178479Sjb pushl %ebp 297178479Sjb pushl %ebx 298178479Sjb pushl %esi 299178479Sjb pushl %edi 300178479Sjb subl $20,%esp 301178479Sjb movl 44(%esp),%eax 302178479Sjb movl 52(%esp),%ebx 303178479Sjb call _mul_1x1_ialu 304178479Sjb movl %eax,8(%esp) 305178479Sjb movl %edx,12(%esp) 306178479Sjb movl 48(%esp),%eax 307178479Sjb movl 56(%esp),%ebx 308178479Sjb call _mul_1x1_ialu 309178479Sjb movl %eax,(%esp) 310178479Sjb movl %edx,4(%esp) 311178479Sjb movl 44(%esp),%eax 312178479Sjb movl 52(%esp),%ebx 313178479Sjb xorl 48(%esp),%eax 314178479Sjb xorl 56(%esp),%ebx 315178479Sjb call _mul_1x1_ialu 316178479Sjb movl 40(%esp),%ebp 317178479Sjb movl (%esp),%ebx 318178479Sjb movl 4(%esp),%ecx 319178479Sjb movl 8(%esp),%edi 320178479Sjb movl 12(%esp),%esi 321178479Sjb xorl %edx,%eax 322178479Sjb xorl %ecx,%edx 323178479Sjb xorl %ebx,%eax 324178479Sjb movl %ebx,(%ebp) 325178479Sjb xorl %edi,%edx 326178479Sjb movl %esi,12(%ebp) 327178479Sjb xorl %esi,%eax 328178479Sjb addl $20,%esp 329178479Sjb xorl %esi,%edx 330178479Sjb popl %edi 331178479Sjb xorl %edx,%eax 332178479Sjb popl %esi 333178479Sjb movl %edx,8(%ebp) 334178479Sjb popl %ebx 335178479Sjb movl %eax,4(%ebp) 336178479Sjb popl %ebp 337178479Sjb ret 338178479Sjb.size bn_GF2m_mul_2x2,.-.L_bn_GF2m_mul_2x2_begin 339178479Sjb.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105 340178479Sjb.byte 99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32 341178479Sjb.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 342178479Sjb.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 343178479Sjb.byte 62,0 344178479Sjb.comm OPENSSL_ia32cap_P,8,4 345178479Sjb