x86-gf2m.S revision 305153
1/* $FreeBSD: stable/11/secure/lib/libcrypto/i386/x86-gf2m.S 305153 2016-08-31 20:33:59Z jkim $ */ 2/* Do not modify. This file is auto-generated from x86-gf2m.pl. */ 3#ifdef PIC 4.file "x86-gf2m.S" 5.text 6.type _mul_1x1_mmx,@function 7.align 16 8_mul_1x1_mmx: 9 subl $36,%esp 10 movl %eax,%ecx 11 leal (%eax,%eax,1),%edx 12 andl $1073741823,%ecx 13 leal (%edx,%edx,1),%ebp 14 movl $0,(%esp) 15 andl $2147483647,%edx 16 movd %eax,%mm2 17 movd %ebx,%mm3 18 movl %ecx,4(%esp) 19 xorl %edx,%ecx 20 pxor %mm5,%mm5 21 pxor %mm4,%mm4 22 movl %edx,8(%esp) 23 xorl %ebp,%edx 24 movl %ecx,12(%esp) 25 pcmpgtd %mm2,%mm5 26 paddd %mm2,%mm2 27 xorl %edx,%ecx 28 movl %ebp,16(%esp) 29 xorl %edx,%ebp 30 pand %mm3,%mm5 31 pcmpgtd %mm2,%mm4 32 movl %ecx,20(%esp) 33 xorl %ecx,%ebp 34 psllq $31,%mm5 35 pand %mm3,%mm4 36 movl %edx,24(%esp) 37 movl $7,%esi 38 movl %ebp,28(%esp) 39 movl %esi,%ebp 40 andl %ebx,%esi 41 shrl $3,%ebx 42 movl %ebp,%edi 43 psllq $30,%mm4 44 andl %ebx,%edi 45 shrl $3,%ebx 46 movd (%esp,%esi,4),%mm0 47 movl %ebp,%esi 48 andl %ebx,%esi 49 shrl $3,%ebx 50 movd (%esp,%edi,4),%mm2 51 movl %ebp,%edi 52 psllq $3,%mm2 53 andl %ebx,%edi 54 shrl $3,%ebx 55 pxor %mm2,%mm0 56 movd (%esp,%esi,4),%mm1 57 movl %ebp,%esi 58 psllq $6,%mm1 59 andl %ebx,%esi 60 shrl $3,%ebx 61 pxor %mm1,%mm0 62 movd (%esp,%edi,4),%mm2 63 movl %ebp,%edi 64 psllq $9,%mm2 65 andl %ebx,%edi 66 shrl $3,%ebx 67 pxor %mm2,%mm0 68 movd (%esp,%esi,4),%mm1 69 movl %ebp,%esi 70 psllq $12,%mm1 71 andl %ebx,%esi 72 shrl $3,%ebx 73 pxor %mm1,%mm0 74 movd (%esp,%edi,4),%mm2 75 movl %ebp,%edi 76 psllq $15,%mm2 77 andl %ebx,%edi 78 shrl $3,%ebx 79 pxor %mm2,%mm0 80 movd (%esp,%esi,4),%mm1 81 movl %ebp,%esi 82 psllq $18,%mm1 83 andl %ebx,%esi 84 shrl $3,%ebx 85 pxor %mm1,%mm0 86 movd (%esp,%edi,4),%mm2 87 movl %ebp,%edi 88 psllq $21,%mm2 89 andl %ebx,%edi 90 shrl $3,%ebx 91 pxor %mm2,%mm0 92 movd (%esp,%esi,4),%mm1 93 movl %ebp,%esi 94 psllq $24,%mm1 95 andl %ebx,%esi 96 shrl $3,%ebx 97 pxor %mm1,%mm0 98 movd (%esp,%edi,4),%mm2 99 pxor %mm4,%mm0 100 psllq $27,%mm2 101 pxor %mm2,%mm0 102 movd (%esp,%esi,4),%mm1 103 pxor %mm5,%mm0 104 psllq $30,%mm1 105 addl $36,%esp 106 pxor %mm1,%mm0 107 ret 108.size _mul_1x1_mmx,.-_mul_1x1_mmx 109.type _mul_1x1_ialu,@function 110.align 16 111_mul_1x1_ialu: 112 subl $36,%esp 113 movl %eax,%ecx 114 leal (%eax,%eax,1),%edx 115 leal (,%eax,4),%ebp 116 andl $1073741823,%ecx 117 leal (%eax,%eax,1),%edi 118 sarl $31,%eax 119 movl $0,(%esp) 120 andl $2147483647,%edx 121 movl %ecx,4(%esp) 122 xorl %edx,%ecx 123 movl %edx,8(%esp) 124 xorl %ebp,%edx 125 movl %ecx,12(%esp) 126 xorl %edx,%ecx 127 movl %ebp,16(%esp) 128 xorl %edx,%ebp 129 movl %ecx,20(%esp) 130 xorl %ecx,%ebp 131 sarl $31,%edi 132 andl %ebx,%eax 133 movl %edx,24(%esp) 134 andl %ebx,%edi 135 movl %ebp,28(%esp) 136 movl %eax,%edx 137 shll $31,%eax 138 movl %edi,%ecx 139 shrl $1,%edx 140 movl $7,%esi 141 shll $30,%edi 142 andl %ebx,%esi 143 shrl $2,%ecx 144 xorl %edi,%eax 145 shrl $3,%ebx 146 movl $7,%edi 147 andl %ebx,%edi 148 shrl $3,%ebx 149 xorl %ecx,%edx 150 xorl (%esp,%esi,4),%eax 151 movl $7,%esi 152 andl %ebx,%esi 153 shrl $3,%ebx 154 movl (%esp,%edi,4),%ebp 155 movl $7,%edi 156 movl %ebp,%ecx 157 shll $3,%ebp 158 andl %ebx,%edi 159 shrl $29,%ecx 160 xorl %ebp,%eax 161 shrl $3,%ebx 162 xorl %ecx,%edx 163 movl (%esp,%esi,4),%ecx 164 movl $7,%esi 165 movl %ecx,%ebp 166 shll $6,%ecx 167 andl %ebx,%esi 168 shrl $26,%ebp 169 xorl %ecx,%eax 170 shrl $3,%ebx 171 xorl %ebp,%edx 172 movl (%esp,%edi,4),%ebp 173 movl $7,%edi 174 movl %ebp,%ecx 175 shll $9,%ebp 176 andl %ebx,%edi 177 shrl $23,%ecx 178 xorl %ebp,%eax 179 shrl $3,%ebx 180 xorl %ecx,%edx 181 movl (%esp,%esi,4),%ecx 182 movl $7,%esi 183 movl %ecx,%ebp 184 shll $12,%ecx 185 andl %ebx,%esi 186 shrl $20,%ebp 187 xorl %ecx,%eax 188 shrl $3,%ebx 189 xorl %ebp,%edx 190 movl (%esp,%edi,4),%ebp 191 movl $7,%edi 192 movl %ebp,%ecx 193 shll $15,%ebp 194 andl %ebx,%edi 195 shrl $17,%ecx 196 xorl %ebp,%eax 197 shrl $3,%ebx 198 xorl %ecx,%edx 199 movl (%esp,%esi,4),%ecx 200 movl $7,%esi 201 movl %ecx,%ebp 202 shll $18,%ecx 203 andl %ebx,%esi 204 shrl $14,%ebp 205 xorl %ecx,%eax 206 shrl $3,%ebx 207 xorl %ebp,%edx 208 movl (%esp,%edi,4),%ebp 209 movl $7,%edi 210 movl %ebp,%ecx 211 shll $21,%ebp 212 andl %ebx,%edi 213 shrl $11,%ecx 214 xorl %ebp,%eax 215 shrl $3,%ebx 216 xorl %ecx,%edx 217 movl (%esp,%esi,4),%ecx 218 movl $7,%esi 219 movl %ecx,%ebp 220 shll $24,%ecx 221 andl %ebx,%esi 222 shrl $8,%ebp 223 xorl %ecx,%eax 224 shrl $3,%ebx 225 xorl %ebp,%edx 226 movl (%esp,%edi,4),%ebp 227 movl %ebp,%ecx 228 shll $27,%ebp 229 movl (%esp,%esi,4),%edi 230 shrl $5,%ecx 231 movl %edi,%esi 232 xorl %ebp,%eax 233 shll $30,%edi 234 xorl %ecx,%edx 235 shrl $2,%esi 236 xorl %edi,%eax 237 xorl %esi,%edx 238 addl $36,%esp 239 ret 240.size _mul_1x1_ialu,.-_mul_1x1_ialu 241.globl bn_GF2m_mul_2x2 242.type bn_GF2m_mul_2x2,@function 243.align 16 244bn_GF2m_mul_2x2: 245.L_bn_GF2m_mul_2x2_begin: 246 call .L000PIC_me_up 247.L000PIC_me_up: 248 popl %edx 249 leal OPENSSL_ia32cap_P-.L000PIC_me_up(%edx),%edx 250 movl (%edx),%eax 251 movl 4(%edx),%edx 252 testl $8388608,%eax 253 jz .L001ialu 254 testl $16777216,%eax 255 jz .L002mmx 256 testl $2,%edx 257 jz .L002mmx 258 movups 8(%esp),%xmm0 259 shufps $177,%xmm0,%xmm0 260.byte 102,15,58,68,192,1 261 movl 4(%esp),%eax 262 movups %xmm0,(%eax) 263 ret 264.align 16 265.L002mmx: 266 pushl %ebp 267 pushl %ebx 268 pushl %esi 269 pushl %edi 270 movl 24(%esp),%eax 271 movl 32(%esp),%ebx 272 call _mul_1x1_mmx 273 movq %mm0,%mm7 274 movl 28(%esp),%eax 275 movl 36(%esp),%ebx 276 call _mul_1x1_mmx 277 movq %mm0,%mm6 278 movl 24(%esp),%eax 279 movl 32(%esp),%ebx 280 xorl 28(%esp),%eax 281 xorl 36(%esp),%ebx 282 call _mul_1x1_mmx 283 pxor %mm7,%mm0 284 movl 20(%esp),%eax 285 pxor %mm6,%mm0 286 movq %mm0,%mm2 287 psllq $32,%mm0 288 popl %edi 289 psrlq $32,%mm2 290 popl %esi 291 pxor %mm6,%mm0 292 popl %ebx 293 pxor %mm7,%mm2 294 movq %mm0,(%eax) 295 popl %ebp 296 movq %mm2,8(%eax) 297 emms 298 ret 299.align 16 300.L001ialu: 301 pushl %ebp 302 pushl %ebx 303 pushl %esi 304 pushl %edi 305 subl $20,%esp 306 movl 44(%esp),%eax 307 movl 52(%esp),%ebx 308 call _mul_1x1_ialu 309 movl %eax,8(%esp) 310 movl %edx,12(%esp) 311 movl 48(%esp),%eax 312 movl 56(%esp),%ebx 313 call _mul_1x1_ialu 314 movl %eax,(%esp) 315 movl %edx,4(%esp) 316 movl 44(%esp),%eax 317 movl 52(%esp),%ebx 318 xorl 48(%esp),%eax 319 xorl 56(%esp),%ebx 320 call _mul_1x1_ialu 321 movl 40(%esp),%ebp 322 movl (%esp),%ebx 323 movl 4(%esp),%ecx 324 movl 8(%esp),%edi 325 movl 12(%esp),%esi 326 xorl %edx,%eax 327 xorl %ecx,%edx 328 xorl %ebx,%eax 329 movl %ebx,(%ebp) 330 xorl %edi,%edx 331 movl %esi,12(%ebp) 332 xorl %esi,%eax 333 addl $20,%esp 334 xorl %esi,%edx 335 popl %edi 336 xorl %edx,%eax 337 popl %esi 338 movl %edx,8(%ebp) 339 popl %ebx 340 movl %eax,4(%ebp) 341 popl %ebp 342 ret 343.size bn_GF2m_mul_2x2,.-.L_bn_GF2m_mul_2x2_begin 344.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105 345.byte 99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32 346.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 347.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 348.byte 62,0 349.comm OPENSSL_ia32cap_P,16,4 350#else 351.file "x86-gf2m.S" 352.text 353.type _mul_1x1_mmx,@function 354.align 16 355_mul_1x1_mmx: 356 subl $36,%esp 357 movl %eax,%ecx 358 leal (%eax,%eax,1),%edx 359 andl $1073741823,%ecx 360 leal (%edx,%edx,1),%ebp 361 movl $0,(%esp) 362 andl $2147483647,%edx 363 movd %eax,%mm2 364 movd %ebx,%mm3 365 movl %ecx,4(%esp) 366 xorl %edx,%ecx 367 pxor %mm5,%mm5 368 pxor %mm4,%mm4 369 movl %edx,8(%esp) 370 xorl %ebp,%edx 371 movl %ecx,12(%esp) 372 pcmpgtd %mm2,%mm5 373 paddd %mm2,%mm2 374 xorl %edx,%ecx 375 movl %ebp,16(%esp) 376 xorl %edx,%ebp 377 pand %mm3,%mm5 378 pcmpgtd %mm2,%mm4 379 movl %ecx,20(%esp) 380 xorl %ecx,%ebp 381 psllq $31,%mm5 382 pand %mm3,%mm4 383 movl %edx,24(%esp) 384 movl $7,%esi 385 movl %ebp,28(%esp) 386 movl %esi,%ebp 387 andl %ebx,%esi 388 shrl $3,%ebx 389 movl %ebp,%edi 390 psllq $30,%mm4 391 andl %ebx,%edi 392 shrl $3,%ebx 393 movd (%esp,%esi,4),%mm0 394 movl %ebp,%esi 395 andl %ebx,%esi 396 shrl $3,%ebx 397 movd (%esp,%edi,4),%mm2 398 movl %ebp,%edi 399 psllq $3,%mm2 400 andl %ebx,%edi 401 shrl $3,%ebx 402 pxor %mm2,%mm0 403 movd (%esp,%esi,4),%mm1 404 movl %ebp,%esi 405 psllq $6,%mm1 406 andl %ebx,%esi 407 shrl $3,%ebx 408 pxor %mm1,%mm0 409 movd (%esp,%edi,4),%mm2 410 movl %ebp,%edi 411 psllq $9,%mm2 412 andl %ebx,%edi 413 shrl $3,%ebx 414 pxor %mm2,%mm0 415 movd (%esp,%esi,4),%mm1 416 movl %ebp,%esi 417 psllq $12,%mm1 418 andl %ebx,%esi 419 shrl $3,%ebx 420 pxor %mm1,%mm0 421 movd (%esp,%edi,4),%mm2 422 movl %ebp,%edi 423 psllq $15,%mm2 424 andl %ebx,%edi 425 shrl $3,%ebx 426 pxor %mm2,%mm0 427 movd (%esp,%esi,4),%mm1 428 movl %ebp,%esi 429 psllq $18,%mm1 430 andl %ebx,%esi 431 shrl $3,%ebx 432 pxor %mm1,%mm0 433 movd (%esp,%edi,4),%mm2 434 movl %ebp,%edi 435 psllq $21,%mm2 436 andl %ebx,%edi 437 shrl $3,%ebx 438 pxor %mm2,%mm0 439 movd (%esp,%esi,4),%mm1 440 movl %ebp,%esi 441 psllq $24,%mm1 442 andl %ebx,%esi 443 shrl $3,%ebx 444 pxor %mm1,%mm0 445 movd (%esp,%edi,4),%mm2 446 pxor %mm4,%mm0 447 psllq $27,%mm2 448 pxor %mm2,%mm0 449 movd (%esp,%esi,4),%mm1 450 pxor %mm5,%mm0 451 psllq $30,%mm1 452 addl $36,%esp 453 pxor %mm1,%mm0 454 ret 455.size _mul_1x1_mmx,.-_mul_1x1_mmx 456.type _mul_1x1_ialu,@function 457.align 16 458_mul_1x1_ialu: 459 subl $36,%esp 460 movl %eax,%ecx 461 leal (%eax,%eax,1),%edx 462 leal (,%eax,4),%ebp 463 andl $1073741823,%ecx 464 leal (%eax,%eax,1),%edi 465 sarl $31,%eax 466 movl $0,(%esp) 467 andl $2147483647,%edx 468 movl %ecx,4(%esp) 469 xorl %edx,%ecx 470 movl %edx,8(%esp) 471 xorl %ebp,%edx 472 movl %ecx,12(%esp) 473 xorl %edx,%ecx 474 movl %ebp,16(%esp) 475 xorl %edx,%ebp 476 movl %ecx,20(%esp) 477 xorl %ecx,%ebp 478 sarl $31,%edi 479 andl %ebx,%eax 480 movl %edx,24(%esp) 481 andl %ebx,%edi 482 movl %ebp,28(%esp) 483 movl %eax,%edx 484 shll $31,%eax 485 movl %edi,%ecx 486 shrl $1,%edx 487 movl $7,%esi 488 shll $30,%edi 489 andl %ebx,%esi 490 shrl $2,%ecx 491 xorl %edi,%eax 492 shrl $3,%ebx 493 movl $7,%edi 494 andl %ebx,%edi 495 shrl $3,%ebx 496 xorl %ecx,%edx 497 xorl (%esp,%esi,4),%eax 498 movl $7,%esi 499 andl %ebx,%esi 500 shrl $3,%ebx 501 movl (%esp,%edi,4),%ebp 502 movl $7,%edi 503 movl %ebp,%ecx 504 shll $3,%ebp 505 andl %ebx,%edi 506 shrl $29,%ecx 507 xorl %ebp,%eax 508 shrl $3,%ebx 509 xorl %ecx,%edx 510 movl (%esp,%esi,4),%ecx 511 movl $7,%esi 512 movl %ecx,%ebp 513 shll $6,%ecx 514 andl %ebx,%esi 515 shrl $26,%ebp 516 xorl %ecx,%eax 517 shrl $3,%ebx 518 xorl %ebp,%edx 519 movl (%esp,%edi,4),%ebp 520 movl $7,%edi 521 movl %ebp,%ecx 522 shll $9,%ebp 523 andl %ebx,%edi 524 shrl $23,%ecx 525 xorl %ebp,%eax 526 shrl $3,%ebx 527 xorl %ecx,%edx 528 movl (%esp,%esi,4),%ecx 529 movl $7,%esi 530 movl %ecx,%ebp 531 shll $12,%ecx 532 andl %ebx,%esi 533 shrl $20,%ebp 534 xorl %ecx,%eax 535 shrl $3,%ebx 536 xorl %ebp,%edx 537 movl (%esp,%edi,4),%ebp 538 movl $7,%edi 539 movl %ebp,%ecx 540 shll $15,%ebp 541 andl %ebx,%edi 542 shrl $17,%ecx 543 xorl %ebp,%eax 544 shrl $3,%ebx 545 xorl %ecx,%edx 546 movl (%esp,%esi,4),%ecx 547 movl $7,%esi 548 movl %ecx,%ebp 549 shll $18,%ecx 550 andl %ebx,%esi 551 shrl $14,%ebp 552 xorl %ecx,%eax 553 shrl $3,%ebx 554 xorl %ebp,%edx 555 movl (%esp,%edi,4),%ebp 556 movl $7,%edi 557 movl %ebp,%ecx 558 shll $21,%ebp 559 andl %ebx,%edi 560 shrl $11,%ecx 561 xorl %ebp,%eax 562 shrl $3,%ebx 563 xorl %ecx,%edx 564 movl (%esp,%esi,4),%ecx 565 movl $7,%esi 566 movl %ecx,%ebp 567 shll $24,%ecx 568 andl %ebx,%esi 569 shrl $8,%ebp 570 xorl %ecx,%eax 571 shrl $3,%ebx 572 xorl %ebp,%edx 573 movl (%esp,%edi,4),%ebp 574 movl %ebp,%ecx 575 shll $27,%ebp 576 movl (%esp,%esi,4),%edi 577 shrl $5,%ecx 578 movl %edi,%esi 579 xorl %ebp,%eax 580 shll $30,%edi 581 xorl %ecx,%edx 582 shrl $2,%esi 583 xorl %edi,%eax 584 xorl %esi,%edx 585 addl $36,%esp 586 ret 587.size _mul_1x1_ialu,.-_mul_1x1_ialu 588.globl bn_GF2m_mul_2x2 589.type bn_GF2m_mul_2x2,@function 590.align 16 591bn_GF2m_mul_2x2: 592.L_bn_GF2m_mul_2x2_begin: 593 leal OPENSSL_ia32cap_P,%edx 594 movl (%edx),%eax 595 movl 4(%edx),%edx 596 testl $8388608,%eax 597 jz .L000ialu 598 testl $16777216,%eax 599 jz .L001mmx 600 testl $2,%edx 601 jz .L001mmx 602 movups 8(%esp),%xmm0 603 shufps $177,%xmm0,%xmm0 604.byte 102,15,58,68,192,1 605 movl 4(%esp),%eax 606 movups %xmm0,(%eax) 607 ret 608.align 16 609.L001mmx: 610 pushl %ebp 611 pushl %ebx 612 pushl %esi 613 pushl %edi 614 movl 24(%esp),%eax 615 movl 32(%esp),%ebx 616 call _mul_1x1_mmx 617 movq %mm0,%mm7 618 movl 28(%esp),%eax 619 movl 36(%esp),%ebx 620 call _mul_1x1_mmx 621 movq %mm0,%mm6 622 movl 24(%esp),%eax 623 movl 32(%esp),%ebx 624 xorl 28(%esp),%eax 625 xorl 36(%esp),%ebx 626 call _mul_1x1_mmx 627 pxor %mm7,%mm0 628 movl 20(%esp),%eax 629 pxor %mm6,%mm0 630 movq %mm0,%mm2 631 psllq $32,%mm0 632 popl %edi 633 psrlq $32,%mm2 634 popl %esi 635 pxor %mm6,%mm0 636 popl %ebx 637 pxor %mm7,%mm2 638 movq %mm0,(%eax) 639 popl %ebp 640 movq %mm2,8(%eax) 641 emms 642 ret 643.align 16 644.L000ialu: 645 pushl %ebp 646 pushl %ebx 647 pushl %esi 648 pushl %edi 649 subl $20,%esp 650 movl 44(%esp),%eax 651 movl 52(%esp),%ebx 652 call _mul_1x1_ialu 653 movl %eax,8(%esp) 654 movl %edx,12(%esp) 655 movl 48(%esp),%eax 656 movl 56(%esp),%ebx 657 call _mul_1x1_ialu 658 movl %eax,(%esp) 659 movl %edx,4(%esp) 660 movl 44(%esp),%eax 661 movl 52(%esp),%ebx 662 xorl 48(%esp),%eax 663 xorl 56(%esp),%ebx 664 call _mul_1x1_ialu 665 movl 40(%esp),%ebp 666 movl (%esp),%ebx 667 movl 4(%esp),%ecx 668 movl 8(%esp),%edi 669 movl 12(%esp),%esi 670 xorl %edx,%eax 671 xorl %ecx,%edx 672 xorl %ebx,%eax 673 movl %ebx,(%ebp) 674 xorl %edi,%edx 675 movl %esi,12(%ebp) 676 xorl %esi,%eax 677 addl $20,%esp 678 xorl %esi,%edx 679 popl %edi 680 xorl %edx,%eax 681 popl %esi 682 movl %edx,8(%ebp) 683 popl %ebx 684 movl %eax,4(%ebp) 685 popl %ebp 686 ret 687.size bn_GF2m_mul_2x2,.-.L_bn_GF2m_mul_2x2_begin 688.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105 689.byte 99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32 690.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 691.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 692.byte 62,0 693.comm OPENSSL_ia32cap_P,16,4 694#endif 695