x86-gf2m.S revision 299389
1 # $FreeBSD: head/secure/lib/libcrypto/i386/x86-gf2m.S 299389 2016-05-10 20:31:09Z jkim $ 2#ifdef PIC 3.file "x86-gf2m.S" 4.text 5.type _mul_1x1_mmx,@function 6.align 16 7_mul_1x1_mmx: 8 subl $36,%esp 9 movl %eax,%ecx 10 leal (%eax,%eax,1),%edx 11 andl $1073741823,%ecx 12 leal (%edx,%edx,1),%ebp 13 movl $0,(%esp) 14 andl $2147483647,%edx 15 movd %eax,%mm2 16 movd %ebx,%mm3 17 movl %ecx,4(%esp) 18 xorl %edx,%ecx 19 pxor %mm5,%mm5 20 pxor %mm4,%mm4 21 movl %edx,8(%esp) 22 xorl %ebp,%edx 23 movl %ecx,12(%esp) 24 pcmpgtd %mm2,%mm5 25 paddd %mm2,%mm2 26 xorl %edx,%ecx 27 movl %ebp,16(%esp) 28 xorl %edx,%ebp 29 pand %mm3,%mm5 30 pcmpgtd %mm2,%mm4 31 movl %ecx,20(%esp) 32 xorl %ecx,%ebp 33 psllq $31,%mm5 34 pand %mm3,%mm4 35 movl %edx,24(%esp) 36 movl $7,%esi 37 movl %ebp,28(%esp) 38 movl %esi,%ebp 39 andl %ebx,%esi 40 shrl $3,%ebx 41 movl %ebp,%edi 42 psllq $30,%mm4 43 andl %ebx,%edi 44 shrl $3,%ebx 45 movd (%esp,%esi,4),%mm0 46 movl %ebp,%esi 47 andl %ebx,%esi 48 shrl $3,%ebx 49 movd (%esp,%edi,4),%mm2 50 movl %ebp,%edi 51 psllq $3,%mm2 52 andl %ebx,%edi 53 shrl $3,%ebx 54 pxor %mm2,%mm0 55 movd (%esp,%esi,4),%mm1 56 movl %ebp,%esi 57 psllq $6,%mm1 58 andl %ebx,%esi 59 shrl $3,%ebx 60 pxor %mm1,%mm0 61 movd (%esp,%edi,4),%mm2 62 movl %ebp,%edi 63 psllq $9,%mm2 64 andl %ebx,%edi 65 shrl $3,%ebx 66 pxor %mm2,%mm0 67 movd (%esp,%esi,4),%mm1 68 movl %ebp,%esi 69 psllq $12,%mm1 70 andl %ebx,%esi 71 shrl $3,%ebx 72 pxor %mm1,%mm0 73 movd (%esp,%edi,4),%mm2 74 movl %ebp,%edi 75 psllq $15,%mm2 76 andl %ebx,%edi 77 shrl $3,%ebx 78 pxor %mm2,%mm0 79 movd (%esp,%esi,4),%mm1 80 movl %ebp,%esi 81 psllq $18,%mm1 82 andl %ebx,%esi 83 shrl $3,%ebx 84 pxor %mm1,%mm0 85 movd (%esp,%edi,4),%mm2 86 movl %ebp,%edi 87 psllq $21,%mm2 88 andl %ebx,%edi 89 shrl $3,%ebx 90 pxor %mm2,%mm0 91 movd (%esp,%esi,4),%mm1 92 movl %ebp,%esi 93 psllq $24,%mm1 94 andl %ebx,%esi 95 shrl $3,%ebx 96 pxor %mm1,%mm0 97 movd (%esp,%edi,4),%mm2 98 pxor %mm4,%mm0 99 psllq $27,%mm2 100 pxor %mm2,%mm0 101 movd (%esp,%esi,4),%mm1 102 pxor %mm5,%mm0 103 psllq $30,%mm1 104 addl $36,%esp 105 pxor %mm1,%mm0 106 ret 107.size _mul_1x1_mmx,.-_mul_1x1_mmx 108.type _mul_1x1_ialu,@function 109.align 16 110_mul_1x1_ialu: 111 subl $36,%esp 112 movl %eax,%ecx 113 leal (%eax,%eax,1),%edx 114 leal (,%eax,4),%ebp 115 andl $1073741823,%ecx 116 leal (%eax,%eax,1),%edi 117 sarl $31,%eax 118 movl $0,(%esp) 119 andl $2147483647,%edx 120 movl %ecx,4(%esp) 121 xorl %edx,%ecx 122 movl %edx,8(%esp) 123 xorl %ebp,%edx 124 movl %ecx,12(%esp) 125 xorl %edx,%ecx 126 movl %ebp,16(%esp) 127 xorl %edx,%ebp 128 movl %ecx,20(%esp) 129 xorl %ecx,%ebp 130 sarl $31,%edi 131 andl %ebx,%eax 132 movl %edx,24(%esp) 133 andl %ebx,%edi 134 movl %ebp,28(%esp) 135 movl %eax,%edx 136 shll $31,%eax 137 movl %edi,%ecx 138 shrl $1,%edx 139 movl $7,%esi 140 shll $30,%edi 141 andl %ebx,%esi 142 shrl $2,%ecx 143 xorl %edi,%eax 144 shrl $3,%ebx 145 movl $7,%edi 146 andl %ebx,%edi 147 shrl $3,%ebx 148 xorl %ecx,%edx 149 xorl (%esp,%esi,4),%eax 150 movl $7,%esi 151 andl %ebx,%esi 152 shrl $3,%ebx 153 movl (%esp,%edi,4),%ebp 154 movl $7,%edi 155 movl %ebp,%ecx 156 shll $3,%ebp 157 andl %ebx,%edi 158 shrl $29,%ecx 159 xorl %ebp,%eax 160 shrl $3,%ebx 161 xorl %ecx,%edx 162 movl (%esp,%esi,4),%ecx 163 movl $7,%esi 164 movl %ecx,%ebp 165 shll $6,%ecx 166 andl %ebx,%esi 167 shrl $26,%ebp 168 xorl %ecx,%eax 169 shrl $3,%ebx 170 xorl %ebp,%edx 171 movl (%esp,%edi,4),%ebp 172 movl $7,%edi 173 movl %ebp,%ecx 174 shll $9,%ebp 175 andl %ebx,%edi 176 shrl $23,%ecx 177 xorl %ebp,%eax 178 shrl $3,%ebx 179 xorl %ecx,%edx 180 movl (%esp,%esi,4),%ecx 181 movl $7,%esi 182 movl %ecx,%ebp 183 shll $12,%ecx 184 andl %ebx,%esi 185 shrl $20,%ebp 186 xorl %ecx,%eax 187 shrl $3,%ebx 188 xorl %ebp,%edx 189 movl (%esp,%edi,4),%ebp 190 movl $7,%edi 191 movl %ebp,%ecx 192 shll $15,%ebp 193 andl %ebx,%edi 194 shrl $17,%ecx 195 xorl %ebp,%eax 196 shrl $3,%ebx 197 xorl %ecx,%edx 198 movl (%esp,%esi,4),%ecx 199 movl $7,%esi 200 movl %ecx,%ebp 201 shll $18,%ecx 202 andl %ebx,%esi 203 shrl $14,%ebp 204 xorl %ecx,%eax 205 shrl $3,%ebx 206 xorl %ebp,%edx 207 movl (%esp,%edi,4),%ebp 208 movl $7,%edi 209 movl %ebp,%ecx 210 shll $21,%ebp 211 andl %ebx,%edi 212 shrl $11,%ecx 213 xorl %ebp,%eax 214 shrl $3,%ebx 215 xorl %ecx,%edx 216 movl (%esp,%esi,4),%ecx 217 movl $7,%esi 218 movl %ecx,%ebp 219 shll $24,%ecx 220 andl %ebx,%esi 221 shrl $8,%ebp 222 xorl %ecx,%eax 223 shrl $3,%ebx 224 xorl %ebp,%edx 225 movl (%esp,%edi,4),%ebp 226 movl %ebp,%ecx 227 shll $27,%ebp 228 movl (%esp,%esi,4),%edi 229 shrl $5,%ecx 230 movl %edi,%esi 231 xorl %ebp,%eax 232 shll $30,%edi 233 xorl %ecx,%edx 234 shrl $2,%esi 235 xorl %edi,%eax 236 xorl %esi,%edx 237 addl $36,%esp 238 ret 239.size _mul_1x1_ialu,.-_mul_1x1_ialu 240.globl bn_GF2m_mul_2x2 241.type bn_GF2m_mul_2x2,@function 242.align 16 243bn_GF2m_mul_2x2: 244.L_bn_GF2m_mul_2x2_begin: 245 call .L000PIC_me_up 246.L000PIC_me_up: 247 popl %edx 248 leal OPENSSL_ia32cap_P-.L000PIC_me_up(%edx),%edx 249 movl (%edx),%eax 250 movl 4(%edx),%edx 251 testl $8388608,%eax 252 jz .L001ialu 253 testl $16777216,%eax 254 jz .L002mmx 255 testl $2,%edx 256 jz .L002mmx 257 movups 8(%esp),%xmm0 258 shufps $177,%xmm0,%xmm0 259.byte 102,15,58,68,192,1 260 movl 4(%esp),%eax 261 movups %xmm0,(%eax) 262 ret 263.align 16 264.L002mmx: 265 pushl %ebp 266 pushl %ebx 267 pushl %esi 268 pushl %edi 269 movl 24(%esp),%eax 270 movl 32(%esp),%ebx 271 call _mul_1x1_mmx 272 movq %mm0,%mm7 273 movl 28(%esp),%eax 274 movl 36(%esp),%ebx 275 call _mul_1x1_mmx 276 movq %mm0,%mm6 277 movl 24(%esp),%eax 278 movl 32(%esp),%ebx 279 xorl 28(%esp),%eax 280 xorl 36(%esp),%ebx 281 call _mul_1x1_mmx 282 pxor %mm7,%mm0 283 movl 20(%esp),%eax 284 pxor %mm6,%mm0 285 movq %mm0,%mm2 286 psllq $32,%mm0 287 popl %edi 288 psrlq $32,%mm2 289 popl %esi 290 pxor %mm6,%mm0 291 popl %ebx 292 pxor %mm7,%mm2 293 movq %mm0,(%eax) 294 popl %ebp 295 movq %mm2,8(%eax) 296 emms 297 ret 298.align 16 299.L001ialu: 300 pushl %ebp 301 pushl %ebx 302 pushl %esi 303 pushl %edi 304 subl $20,%esp 305 movl 44(%esp),%eax 306 movl 52(%esp),%ebx 307 call _mul_1x1_ialu 308 movl %eax,8(%esp) 309 movl %edx,12(%esp) 310 movl 48(%esp),%eax 311 movl 56(%esp),%ebx 312 call _mul_1x1_ialu 313 movl %eax,(%esp) 314 movl %edx,4(%esp) 315 movl 44(%esp),%eax 316 movl 52(%esp),%ebx 317 xorl 48(%esp),%eax 318 xorl 56(%esp),%ebx 319 call _mul_1x1_ialu 320 movl 40(%esp),%ebp 321 movl (%esp),%ebx 322 movl 4(%esp),%ecx 323 movl 8(%esp),%edi 324 movl 12(%esp),%esi 325 xorl %edx,%eax 326 xorl %ecx,%edx 327 xorl %ebx,%eax 328 movl %ebx,(%ebp) 329 xorl %edi,%edx 330 movl %esi,12(%ebp) 331 xorl %esi,%eax 332 addl $20,%esp 333 xorl %esi,%edx 334 popl %edi 335 xorl %edx,%eax 336 popl %esi 337 movl %edx,8(%ebp) 338 popl %ebx 339 movl %eax,4(%ebp) 340 popl %ebp 341 ret 342.size bn_GF2m_mul_2x2,.-.L_bn_GF2m_mul_2x2_begin 343.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105 344.byte 99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32 345.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 346.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 347.byte 62,0 348.comm OPENSSL_ia32cap_P,16,4 349#else 350.file "x86-gf2m.S" 351.text 352.type _mul_1x1_mmx,@function 353.align 16 354_mul_1x1_mmx: 355 subl $36,%esp 356 movl %eax,%ecx 357 leal (%eax,%eax,1),%edx 358 andl $1073741823,%ecx 359 leal (%edx,%edx,1),%ebp 360 movl $0,(%esp) 361 andl $2147483647,%edx 362 movd %eax,%mm2 363 movd %ebx,%mm3 364 movl %ecx,4(%esp) 365 xorl %edx,%ecx 366 pxor %mm5,%mm5 367 pxor %mm4,%mm4 368 movl %edx,8(%esp) 369 xorl %ebp,%edx 370 movl %ecx,12(%esp) 371 pcmpgtd %mm2,%mm5 372 paddd %mm2,%mm2 373 xorl %edx,%ecx 374 movl %ebp,16(%esp) 375 xorl %edx,%ebp 376 pand %mm3,%mm5 377 pcmpgtd %mm2,%mm4 378 movl %ecx,20(%esp) 379 xorl %ecx,%ebp 380 psllq $31,%mm5 381 pand %mm3,%mm4 382 movl %edx,24(%esp) 383 movl $7,%esi 384 movl %ebp,28(%esp) 385 movl %esi,%ebp 386 andl %ebx,%esi 387 shrl $3,%ebx 388 movl %ebp,%edi 389 psllq $30,%mm4 390 andl %ebx,%edi 391 shrl $3,%ebx 392 movd (%esp,%esi,4),%mm0 393 movl %ebp,%esi 394 andl %ebx,%esi 395 shrl $3,%ebx 396 movd (%esp,%edi,4),%mm2 397 movl %ebp,%edi 398 psllq $3,%mm2 399 andl %ebx,%edi 400 shrl $3,%ebx 401 pxor %mm2,%mm0 402 movd (%esp,%esi,4),%mm1 403 movl %ebp,%esi 404 psllq $6,%mm1 405 andl %ebx,%esi 406 shrl $3,%ebx 407 pxor %mm1,%mm0 408 movd (%esp,%edi,4),%mm2 409 movl %ebp,%edi 410 psllq $9,%mm2 411 andl %ebx,%edi 412 shrl $3,%ebx 413 pxor %mm2,%mm0 414 movd (%esp,%esi,4),%mm1 415 movl %ebp,%esi 416 psllq $12,%mm1 417 andl %ebx,%esi 418 shrl $3,%ebx 419 pxor %mm1,%mm0 420 movd (%esp,%edi,4),%mm2 421 movl %ebp,%edi 422 psllq $15,%mm2 423 andl %ebx,%edi 424 shrl $3,%ebx 425 pxor %mm2,%mm0 426 movd (%esp,%esi,4),%mm1 427 movl %ebp,%esi 428 psllq $18,%mm1 429 andl %ebx,%esi 430 shrl $3,%ebx 431 pxor %mm1,%mm0 432 movd (%esp,%edi,4),%mm2 433 movl %ebp,%edi 434 psllq $21,%mm2 435 andl %ebx,%edi 436 shrl $3,%ebx 437 pxor %mm2,%mm0 438 movd (%esp,%esi,4),%mm1 439 movl %ebp,%esi 440 psllq $24,%mm1 441 andl %ebx,%esi 442 shrl $3,%ebx 443 pxor %mm1,%mm0 444 movd (%esp,%edi,4),%mm2 445 pxor %mm4,%mm0 446 psllq $27,%mm2 447 pxor %mm2,%mm0 448 movd (%esp,%esi,4),%mm1 449 pxor %mm5,%mm0 450 psllq $30,%mm1 451 addl $36,%esp 452 pxor %mm1,%mm0 453 ret 454.size _mul_1x1_mmx,.-_mul_1x1_mmx 455.type _mul_1x1_ialu,@function 456.align 16 457_mul_1x1_ialu: 458 subl $36,%esp 459 movl %eax,%ecx 460 leal (%eax,%eax,1),%edx 461 leal (,%eax,4),%ebp 462 andl $1073741823,%ecx 463 leal (%eax,%eax,1),%edi 464 sarl $31,%eax 465 movl $0,(%esp) 466 andl $2147483647,%edx 467 movl %ecx,4(%esp) 468 xorl %edx,%ecx 469 movl %edx,8(%esp) 470 xorl %ebp,%edx 471 movl %ecx,12(%esp) 472 xorl %edx,%ecx 473 movl %ebp,16(%esp) 474 xorl %edx,%ebp 475 movl %ecx,20(%esp) 476 xorl %ecx,%ebp 477 sarl $31,%edi 478 andl %ebx,%eax 479 movl %edx,24(%esp) 480 andl %ebx,%edi 481 movl %ebp,28(%esp) 482 movl %eax,%edx 483 shll $31,%eax 484 movl %edi,%ecx 485 shrl $1,%edx 486 movl $7,%esi 487 shll $30,%edi 488 andl %ebx,%esi 489 shrl $2,%ecx 490 xorl %edi,%eax 491 shrl $3,%ebx 492 movl $7,%edi 493 andl %ebx,%edi 494 shrl $3,%ebx 495 xorl %ecx,%edx 496 xorl (%esp,%esi,4),%eax 497 movl $7,%esi 498 andl %ebx,%esi 499 shrl $3,%ebx 500 movl (%esp,%edi,4),%ebp 501 movl $7,%edi 502 movl %ebp,%ecx 503 shll $3,%ebp 504 andl %ebx,%edi 505 shrl $29,%ecx 506 xorl %ebp,%eax 507 shrl $3,%ebx 508 xorl %ecx,%edx 509 movl (%esp,%esi,4),%ecx 510 movl $7,%esi 511 movl %ecx,%ebp 512 shll $6,%ecx 513 andl %ebx,%esi 514 shrl $26,%ebp 515 xorl %ecx,%eax 516 shrl $3,%ebx 517 xorl %ebp,%edx 518 movl (%esp,%edi,4),%ebp 519 movl $7,%edi 520 movl %ebp,%ecx 521 shll $9,%ebp 522 andl %ebx,%edi 523 shrl $23,%ecx 524 xorl %ebp,%eax 525 shrl $3,%ebx 526 xorl %ecx,%edx 527 movl (%esp,%esi,4),%ecx 528 movl $7,%esi 529 movl %ecx,%ebp 530 shll $12,%ecx 531 andl %ebx,%esi 532 shrl $20,%ebp 533 xorl %ecx,%eax 534 shrl $3,%ebx 535 xorl %ebp,%edx 536 movl (%esp,%edi,4),%ebp 537 movl $7,%edi 538 movl %ebp,%ecx 539 shll $15,%ebp 540 andl %ebx,%edi 541 shrl $17,%ecx 542 xorl %ebp,%eax 543 shrl $3,%ebx 544 xorl %ecx,%edx 545 movl (%esp,%esi,4),%ecx 546 movl $7,%esi 547 movl %ecx,%ebp 548 shll $18,%ecx 549 andl %ebx,%esi 550 shrl $14,%ebp 551 xorl %ecx,%eax 552 shrl $3,%ebx 553 xorl %ebp,%edx 554 movl (%esp,%edi,4),%ebp 555 movl $7,%edi 556 movl %ebp,%ecx 557 shll $21,%ebp 558 andl %ebx,%edi 559 shrl $11,%ecx 560 xorl %ebp,%eax 561 shrl $3,%ebx 562 xorl %ecx,%edx 563 movl (%esp,%esi,4),%ecx 564 movl $7,%esi 565 movl %ecx,%ebp 566 shll $24,%ecx 567 andl %ebx,%esi 568 shrl $8,%ebp 569 xorl %ecx,%eax 570 shrl $3,%ebx 571 xorl %ebp,%edx 572 movl (%esp,%edi,4),%ebp 573 movl %ebp,%ecx 574 shll $27,%ebp 575 movl (%esp,%esi,4),%edi 576 shrl $5,%ecx 577 movl %edi,%esi 578 xorl %ebp,%eax 579 shll $30,%edi 580 xorl %ecx,%edx 581 shrl $2,%esi 582 xorl %edi,%eax 583 xorl %esi,%edx 584 addl $36,%esp 585 ret 586.size _mul_1x1_ialu,.-_mul_1x1_ialu 587.globl bn_GF2m_mul_2x2 588.type bn_GF2m_mul_2x2,@function 589.align 16 590bn_GF2m_mul_2x2: 591.L_bn_GF2m_mul_2x2_begin: 592 leal OPENSSL_ia32cap_P,%edx 593 movl (%edx),%eax 594 movl 4(%edx),%edx 595 testl $8388608,%eax 596 jz .L000ialu 597 testl $16777216,%eax 598 jz .L001mmx 599 testl $2,%edx 600 jz .L001mmx 601 movups 8(%esp),%xmm0 602 shufps $177,%xmm0,%xmm0 603.byte 102,15,58,68,192,1 604 movl 4(%esp),%eax 605 movups %xmm0,(%eax) 606 ret 607.align 16 608.L001mmx: 609 pushl %ebp 610 pushl %ebx 611 pushl %esi 612 pushl %edi 613 movl 24(%esp),%eax 614 movl 32(%esp),%ebx 615 call _mul_1x1_mmx 616 movq %mm0,%mm7 617 movl 28(%esp),%eax 618 movl 36(%esp),%ebx 619 call _mul_1x1_mmx 620 movq %mm0,%mm6 621 movl 24(%esp),%eax 622 movl 32(%esp),%ebx 623 xorl 28(%esp),%eax 624 xorl 36(%esp),%ebx 625 call _mul_1x1_mmx 626 pxor %mm7,%mm0 627 movl 20(%esp),%eax 628 pxor %mm6,%mm0 629 movq %mm0,%mm2 630 psllq $32,%mm0 631 popl %edi 632 psrlq $32,%mm2 633 popl %esi 634 pxor %mm6,%mm0 635 popl %ebx 636 pxor %mm7,%mm2 637 movq %mm0,(%eax) 638 popl %ebp 639 movq %mm2,8(%eax) 640 emms 641 ret 642.align 16 643.L000ialu: 644 pushl %ebp 645 pushl %ebx 646 pushl %esi 647 pushl %edi 648 subl $20,%esp 649 movl 44(%esp),%eax 650 movl 52(%esp),%ebx 651 call _mul_1x1_ialu 652 movl %eax,8(%esp) 653 movl %edx,12(%esp) 654 movl 48(%esp),%eax 655 movl 56(%esp),%ebx 656 call _mul_1x1_ialu 657 movl %eax,(%esp) 658 movl %edx,4(%esp) 659 movl 44(%esp),%eax 660 movl 52(%esp),%ebx 661 xorl 48(%esp),%eax 662 xorl 56(%esp),%ebx 663 call _mul_1x1_ialu 664 movl 40(%esp),%ebp 665 movl (%esp),%ebx 666 movl 4(%esp),%ecx 667 movl 8(%esp),%edi 668 movl 12(%esp),%esi 669 xorl %edx,%eax 670 xorl %ecx,%edx 671 xorl %ebx,%eax 672 movl %ebx,(%ebp) 673 xorl %edi,%edx 674 movl %esi,12(%ebp) 675 xorl %esi,%eax 676 addl $20,%esp 677 xorl %esi,%edx 678 popl %edi 679 xorl %edx,%eax 680 popl %esi 681 movl %edx,8(%ebp) 682 popl %ebx 683 movl %eax,4(%ebp) 684 popl %ebp 685 ret 686.size bn_GF2m_mul_2x2,.-.L_bn_GF2m_mul_2x2_begin 687.byte 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105 688.byte 99,97,116,105,111,110,32,102,111,114,32,120,56,54,44,32 689.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 690.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 691.byte 62,0 692.comm OPENSSL_ia32cap_P,16,4 693#endif 694