poly1305-x86.S revision 1.3
1#include <machine/asm.h> 2.text 3.align 64 4.globl poly1305_init 5.type poly1305_init,@function 6.align 16 7poly1305_init: 8.L_poly1305_init_begin: 9 pushl %ebp 10 pushl %ebx 11 pushl %esi 12 pushl %edi 13 movl 20(%esp),%edi 14 movl 24(%esp),%esi 15 movl 28(%esp),%ebp 16 xorl %eax,%eax 17 movl %eax,(%edi) 18 movl %eax,4(%edi) 19 movl %eax,8(%edi) 20 movl %eax,12(%edi) 21 movl %eax,16(%edi) 22 movl %eax,20(%edi) 23 cmpl $0,%esi 24 je .L000nokey 25 call .L001pic_point 26.L001pic_point: 27 popl %ebx 28 leal poly1305_blocks-.L001pic_point(%ebx),%eax 29 leal poly1305_emit-.L001pic_point(%ebx),%edx 30 leal OPENSSL_ia32cap_P-.L001pic_point(%ebx),%edi 31 movl (%edi),%ecx 32 andl $83886080,%ecx 33 cmpl $83886080,%ecx 34 jne .L002no_sse2 35 leal _poly1305_blocks_sse2-.L001pic_point(%ebx),%eax 36 leal _poly1305_emit_sse2-.L001pic_point(%ebx),%edx 37.L002no_sse2: 38 movl 20(%esp),%edi 39 movl %eax,(%ebp) 40 movl %edx,4(%ebp) 41 movl (%esi),%eax 42 movl 4(%esi),%ebx 43 movl 8(%esi),%ecx 44 movl 12(%esi),%edx 45 andl $268435455,%eax 46 andl $268435452,%ebx 47 andl $268435452,%ecx 48 andl $268435452,%edx 49 movl %eax,24(%edi) 50 movl %ebx,28(%edi) 51 movl %ecx,32(%edi) 52 movl %edx,36(%edi) 53 movl $1,%eax 54.L000nokey: 55 popl %edi 56 popl %esi 57 popl %ebx 58 popl %ebp 59 ret 60.size poly1305_init,.-.L_poly1305_init_begin 61.globl poly1305_blocks 62.type poly1305_blocks,@function 63.align 16 64poly1305_blocks: 65.L_poly1305_blocks_begin: 66 pushl %ebp 67 pushl %ebx 68 pushl %esi 69 pushl %edi 70 movl 20(%esp),%edi 71 movl 24(%esp),%esi 72 movl 28(%esp),%ecx 73.Lenter_blocks: 74 andl $-15,%ecx 75 jz .L003nodata 76 subl $64,%esp 77 movl 24(%edi),%eax 78 movl 28(%edi),%ebx 79 leal (%esi,%ecx,1),%ebp 80 movl 32(%edi),%ecx 81 movl 36(%edi),%edx 82 movl %ebp,92(%esp) 83 movl %esi,%ebp 84 movl %eax,36(%esp) 85 movl %ebx,%eax 86 shrl $2,%eax 87 movl %ebx,40(%esp) 88 addl %ebx,%eax 89 movl %ecx,%ebx 90 shrl $2,%ebx 91 movl %ecx,44(%esp) 92 addl %ecx,%ebx 93 movl %edx,%ecx 94 shrl $2,%ecx 95 movl %edx,48(%esp) 96 addl %edx,%ecx 97 movl %eax,52(%esp) 98 movl %ebx,56(%esp) 99 movl %ecx,60(%esp) 100 movl (%edi),%eax 101 movl 4(%edi),%ebx 102 movl 8(%edi),%ecx 103 movl 12(%edi),%esi 104 movl 16(%edi),%edi 105 jmp .L004loop 106.align 32 107.L004loop: 108 addl (%ebp),%eax 109 adcl 4(%ebp),%ebx 110 adcl 8(%ebp),%ecx 111 adcl 12(%ebp),%esi 112 leal 16(%ebp),%ebp 113 adcl 96(%esp),%edi 114 movl %eax,(%esp) 115 movl %esi,12(%esp) 116 mull 36(%esp) 117 movl %edi,16(%esp) 118 movl %eax,%edi 119 movl %ebx,%eax 120 movl %edx,%esi 121 mull 60(%esp) 122 addl %eax,%edi 123 movl %ecx,%eax 124 adcl %edx,%esi 125 mull 56(%esp) 126 addl %eax,%edi 127 movl 12(%esp),%eax 128 adcl %edx,%esi 129 mull 52(%esp) 130 addl %eax,%edi 131 movl (%esp),%eax 132 adcl %edx,%esi 133 mull 40(%esp) 134 movl %edi,20(%esp) 135 xorl %edi,%edi 136 addl %eax,%esi 137 movl %ebx,%eax 138 adcl %edx,%edi 139 mull 36(%esp) 140 addl %eax,%esi 141 movl %ecx,%eax 142 adcl %edx,%edi 143 mull 60(%esp) 144 addl %eax,%esi 145 movl 12(%esp),%eax 146 adcl %edx,%edi 147 mull 56(%esp) 148 addl %eax,%esi 149 movl 16(%esp),%eax 150 adcl %edx,%edi 151 imull 52(%esp),%eax 152 addl %eax,%esi 153 movl (%esp),%eax 154 adcl $0,%edi 155 mull 44(%esp) 156 movl %esi,24(%esp) 157 xorl %esi,%esi 158 addl %eax,%edi 159 movl %ebx,%eax 160 adcl %edx,%esi 161 mull 40(%esp) 162 addl %eax,%edi 163 movl %ecx,%eax 164 adcl %edx,%esi 165 mull 36(%esp) 166 addl %eax,%edi 167 movl 12(%esp),%eax 168 adcl %edx,%esi 169 mull 60(%esp) 170 addl %eax,%edi 171 movl 16(%esp),%eax 172 adcl %edx,%esi 173 imull 56(%esp),%eax 174 addl %eax,%edi 175 movl (%esp),%eax 176 adcl $0,%esi 177 mull 48(%esp) 178 movl %edi,28(%esp) 179 xorl %edi,%edi 180 addl %eax,%esi 181 movl %ebx,%eax 182 adcl %edx,%edi 183 mull 44(%esp) 184 addl %eax,%esi 185 movl %ecx,%eax 186 adcl %edx,%edi 187 mull 40(%esp) 188 addl %eax,%esi 189 movl 12(%esp),%eax 190 adcl %edx,%edi 191 mull 36(%esp) 192 addl %eax,%esi 193 movl 16(%esp),%ecx 194 adcl %edx,%edi 195 movl %ecx,%edx 196 imull 60(%esp),%ecx 197 addl %ecx,%esi 198 movl 20(%esp),%eax 199 adcl $0,%edi 200 imull 36(%esp),%edx 201 addl %edi,%edx 202 movl 24(%esp),%ebx 203 movl 28(%esp),%ecx 204 movl %edx,%edi 205 shrl $2,%edx 206 andl $3,%edi 207 leal (%edx,%edx,4),%edx 208 addl %edx,%eax 209 adcl $0,%ebx 210 adcl $0,%ecx 211 adcl $0,%esi 212 adcl $0,%edi 213 cmpl 92(%esp),%ebp 214 jne .L004loop 215 movl 84(%esp),%edx 216 addl $64,%esp 217 movl %eax,(%edx) 218 movl %ebx,4(%edx) 219 movl %ecx,8(%edx) 220 movl %esi,12(%edx) 221 movl %edi,16(%edx) 222.L003nodata: 223 popl %edi 224 popl %esi 225 popl %ebx 226 popl %ebp 227 ret 228.size poly1305_blocks,.-.L_poly1305_blocks_begin 229.globl poly1305_emit 230.type poly1305_emit,@function 231.align 16 232poly1305_emit: 233.L_poly1305_emit_begin: 234 pushl %ebp 235 pushl %ebx 236 pushl %esi 237 pushl %edi 238 movl 20(%esp),%ebp 239.Lenter_emit: 240 movl 24(%esp),%edi 241 movl (%ebp),%eax 242 movl 4(%ebp),%ebx 243 movl 8(%ebp),%ecx 244 movl 12(%ebp),%edx 245 movl 16(%ebp),%esi 246 addl $5,%eax 247 adcl $0,%ebx 248 adcl $0,%ecx 249 adcl $0,%edx 250 adcl $0,%esi 251 shrl $2,%esi 252 negl %esi 253 andl %esi,%eax 254 andl %esi,%ebx 255 andl %esi,%ecx 256 andl %esi,%edx 257 movl %eax,(%edi) 258 movl %ebx,4(%edi) 259 movl %ecx,8(%edi) 260 movl %edx,12(%edi) 261 notl %esi 262 movl (%ebp),%eax 263 movl 4(%ebp),%ebx 264 movl 8(%ebp),%ecx 265 movl 12(%ebp),%edx 266 movl 28(%esp),%ebp 267 andl %esi,%eax 268 andl %esi,%ebx 269 andl %esi,%ecx 270 andl %esi,%edx 271 orl (%edi),%eax 272 orl 4(%edi),%ebx 273 orl 8(%edi),%ecx 274 orl 12(%edi),%edx 275 addl (%ebp),%eax 276 adcl 4(%ebp),%ebx 277 adcl 8(%ebp),%ecx 278 adcl 12(%ebp),%edx 279 movl %eax,(%edi) 280 movl %ebx,4(%edi) 281 movl %ecx,8(%edi) 282 movl %edx,12(%edi) 283 popl %edi 284 popl %esi 285 popl %ebx 286 popl %ebp 287 ret 288.size poly1305_emit,.-.L_poly1305_emit_begin 289.align 32 290.type _poly1305_init_sse2,@function 291.align 16 292_poly1305_init_sse2: 293 movdqu 24(%edi),%xmm4 294 leal 48(%edi),%edi 295 movl %esp,%ebp 296 subl $224,%esp 297 andl $-16,%esp 298 movq 64(%ebx),%xmm7 299 movdqa %xmm4,%xmm0 300 movdqa %xmm4,%xmm1 301 movdqa %xmm4,%xmm2 302 pand %xmm7,%xmm0 303 psrlq $26,%xmm1 304 psrldq $6,%xmm2 305 pand %xmm7,%xmm1 306 movdqa %xmm2,%xmm3 307 psrlq $4,%xmm2 308 psrlq $30,%xmm3 309 pand %xmm7,%xmm2 310 pand %xmm7,%xmm3 311 psrldq $13,%xmm4 312 leal 144(%esp),%edx 313 movl $2,%ecx 314.L005square: 315 movdqa %xmm0,(%esp) 316 movdqa %xmm1,16(%esp) 317 movdqa %xmm2,32(%esp) 318 movdqa %xmm3,48(%esp) 319 movdqa %xmm4,64(%esp) 320 movdqa %xmm1,%xmm6 321 movdqa %xmm2,%xmm5 322 pslld $2,%xmm6 323 pslld $2,%xmm5 324 paddd %xmm1,%xmm6 325 paddd %xmm2,%xmm5 326 movdqa %xmm6,80(%esp) 327 movdqa %xmm5,96(%esp) 328 movdqa %xmm3,%xmm6 329 movdqa %xmm4,%xmm5 330 pslld $2,%xmm6 331 pslld $2,%xmm5 332 paddd %xmm3,%xmm6 333 paddd %xmm4,%xmm5 334 movdqa %xmm6,112(%esp) 335 movdqa %xmm5,128(%esp) 336 pshufd $68,%xmm0,%xmm6 337 movdqa %xmm1,%xmm5 338 pshufd $68,%xmm1,%xmm1 339 pshufd $68,%xmm2,%xmm2 340 pshufd $68,%xmm3,%xmm3 341 pshufd $68,%xmm4,%xmm4 342 movdqa %xmm6,(%edx) 343 movdqa %xmm1,16(%edx) 344 movdqa %xmm2,32(%edx) 345 movdqa %xmm3,48(%edx) 346 movdqa %xmm4,64(%edx) 347 pmuludq %xmm0,%xmm4 348 pmuludq %xmm0,%xmm3 349 pmuludq %xmm0,%xmm2 350 pmuludq %xmm0,%xmm1 351 pmuludq %xmm6,%xmm0 352 movdqa %xmm5,%xmm6 353 pmuludq 48(%edx),%xmm5 354 movdqa %xmm6,%xmm7 355 pmuludq 32(%edx),%xmm6 356 paddq %xmm5,%xmm4 357 movdqa %xmm7,%xmm5 358 pmuludq 16(%edx),%xmm7 359 paddq %xmm6,%xmm3 360 movdqa 80(%esp),%xmm6 361 pmuludq (%edx),%xmm5 362 paddq %xmm7,%xmm2 363 pmuludq 64(%edx),%xmm6 364 movdqa 32(%esp),%xmm7 365 paddq %xmm5,%xmm1 366 movdqa %xmm7,%xmm5 367 pmuludq 32(%edx),%xmm7 368 paddq %xmm6,%xmm0 369 movdqa %xmm5,%xmm6 370 pmuludq 16(%edx),%xmm5 371 paddq %xmm7,%xmm4 372 movdqa 96(%esp),%xmm7 373 pmuludq (%edx),%xmm6 374 paddq %xmm5,%xmm3 375 movdqa %xmm7,%xmm5 376 pmuludq 64(%edx),%xmm7 377 paddq %xmm6,%xmm2 378 pmuludq 48(%edx),%xmm5 379 movdqa 48(%esp),%xmm6 380 paddq %xmm7,%xmm1 381 movdqa %xmm6,%xmm7 382 pmuludq 16(%edx),%xmm6 383 paddq %xmm5,%xmm0 384 movdqa 112(%esp),%xmm5 385 pmuludq (%edx),%xmm7 386 paddq %xmm6,%xmm4 387 movdqa %xmm5,%xmm6 388 pmuludq 64(%edx),%xmm5 389 paddq %xmm7,%xmm3 390 movdqa %xmm6,%xmm7 391 pmuludq 48(%edx),%xmm6 392 paddq %xmm5,%xmm2 393 pmuludq 32(%edx),%xmm7 394 movdqa 64(%esp),%xmm5 395 paddq %xmm6,%xmm1 396 movdqa 128(%esp),%xmm6 397 pmuludq (%edx),%xmm5 398 paddq %xmm7,%xmm0 399 movdqa %xmm6,%xmm7 400 pmuludq 64(%edx),%xmm6 401 paddq %xmm5,%xmm4 402 movdqa %xmm7,%xmm5 403 pmuludq 16(%edx),%xmm7 404 paddq %xmm6,%xmm3 405 movdqa %xmm5,%xmm6 406 pmuludq 32(%edx),%xmm5 407 paddq %xmm7,%xmm0 408 pmuludq 48(%edx),%xmm6 409 movdqa 64(%ebx),%xmm7 410 paddq %xmm5,%xmm1 411 paddq %xmm6,%xmm2 412 movdqa %xmm3,%xmm5 413 pand %xmm7,%xmm3 414 psrlq $26,%xmm5 415 paddq %xmm4,%xmm5 416 movdqa %xmm0,%xmm6 417 pand %xmm7,%xmm0 418 psrlq $26,%xmm6 419 movdqa %xmm5,%xmm4 420 paddq %xmm1,%xmm6 421 psrlq $26,%xmm5 422 pand %xmm7,%xmm4 423 movdqa %xmm6,%xmm1 424 psrlq $26,%xmm6 425 paddd %xmm5,%xmm0 426 psllq $2,%xmm5 427 paddq %xmm2,%xmm6 428 paddq %xmm0,%xmm5 429 pand %xmm7,%xmm1 430 movdqa %xmm6,%xmm2 431 psrlq $26,%xmm6 432 pand %xmm7,%xmm2 433 paddd %xmm3,%xmm6 434 movdqa %xmm5,%xmm0 435 psrlq $26,%xmm5 436 movdqa %xmm6,%xmm3 437 psrlq $26,%xmm6 438 pand %xmm7,%xmm0 439 paddd %xmm5,%xmm1 440 pand %xmm7,%xmm3 441 paddd %xmm6,%xmm4 442 decl %ecx 443 jz .L006square_break 444 punpcklqdq (%esp),%xmm0 445 punpcklqdq 16(%esp),%xmm1 446 punpcklqdq 32(%esp),%xmm2 447 punpcklqdq 48(%esp),%xmm3 448 punpcklqdq 64(%esp),%xmm4 449 jmp .L005square 450.L006square_break: 451 psllq $32,%xmm0 452 psllq $32,%xmm1 453 psllq $32,%xmm2 454 psllq $32,%xmm3 455 psllq $32,%xmm4 456 por (%esp),%xmm0 457 por 16(%esp),%xmm1 458 por 32(%esp),%xmm2 459 por 48(%esp),%xmm3 460 por 64(%esp),%xmm4 461 pshufd $141,%xmm0,%xmm0 462 pshufd $141,%xmm1,%xmm1 463 pshufd $141,%xmm2,%xmm2 464 pshufd $141,%xmm3,%xmm3 465 pshufd $141,%xmm4,%xmm4 466 movdqu %xmm0,(%edi) 467 movdqu %xmm1,16(%edi) 468 movdqu %xmm2,32(%edi) 469 movdqu %xmm3,48(%edi) 470 movdqu %xmm4,64(%edi) 471 movdqa %xmm1,%xmm6 472 movdqa %xmm2,%xmm5 473 pslld $2,%xmm6 474 pslld $2,%xmm5 475 paddd %xmm1,%xmm6 476 paddd %xmm2,%xmm5 477 movdqu %xmm6,80(%edi) 478 movdqu %xmm5,96(%edi) 479 movdqa %xmm3,%xmm6 480 movdqa %xmm4,%xmm5 481 pslld $2,%xmm6 482 pslld $2,%xmm5 483 paddd %xmm3,%xmm6 484 paddd %xmm4,%xmm5 485 movdqu %xmm6,112(%edi) 486 movdqu %xmm5,128(%edi) 487 movl %ebp,%esp 488 leal -48(%edi),%edi 489 ret 490.size _poly1305_init_sse2,.-_poly1305_init_sse2 491.align 32 492.type _poly1305_blocks_sse2,@function 493.align 16 494_poly1305_blocks_sse2: 495 pushl %ebp 496 pushl %ebx 497 pushl %esi 498 pushl %edi 499 movl 20(%esp),%edi 500 movl 24(%esp),%esi 501 movl 28(%esp),%ecx 502 movl 20(%edi),%eax 503 andl $-16,%ecx 504 jz .L007nodata 505 cmpl $64,%ecx 506 jae .L008enter_sse2 507 testl %eax,%eax 508 jz .Lenter_blocks 509.align 16 510.L008enter_sse2: 511 call .L009pic_point 512.L009pic_point: 513 popl %ebx 514 leal .Lconst_sse2-.L009pic_point(%ebx),%ebx 515 testl %eax,%eax 516 jnz .L010base2_26 517 call _poly1305_init_sse2 518 movl (%edi),%eax 519 movl 3(%edi),%ecx 520 movl 6(%edi),%edx 521 movl 9(%edi),%esi 522 movl 13(%edi),%ebp 523 movl $1,20(%edi) 524 shrl $2,%ecx 525 andl $67108863,%eax 526 shrl $4,%edx 527 andl $67108863,%ecx 528 shrl $6,%esi 529 andl $67108863,%edx 530 movd %eax,%xmm0 531 movd %ecx,%xmm1 532 movd %edx,%xmm2 533 movd %esi,%xmm3 534 movd %ebp,%xmm4 535 movl 24(%esp),%esi 536 movl 28(%esp),%ecx 537 jmp .L011base2_32 538.align 16 539.L010base2_26: 540 movd (%edi),%xmm0 541 movd 4(%edi),%xmm1 542 movd 8(%edi),%xmm2 543 movd 12(%edi),%xmm3 544 movd 16(%edi),%xmm4 545 movdqa 64(%ebx),%xmm7 546.L011base2_32: 547 movl 32(%esp),%eax 548 movl %esp,%ebp 549 subl $528,%esp 550 andl $-16,%esp 551 leal 48(%edi),%edi 552 shll $24,%eax 553 testl $31,%ecx 554 jz .L012even 555 movdqu (%esi),%xmm6 556 leal 16(%esi),%esi 557 movdqa %xmm6,%xmm5 558 pand %xmm7,%xmm6 559 paddd %xmm6,%xmm0 560 movdqa %xmm5,%xmm6 561 psrlq $26,%xmm5 562 psrldq $6,%xmm6 563 pand %xmm7,%xmm5 564 paddd %xmm5,%xmm1 565 movdqa %xmm6,%xmm5 566 psrlq $4,%xmm6 567 pand %xmm7,%xmm6 568 paddd %xmm6,%xmm2 569 movdqa %xmm5,%xmm6 570 psrlq $30,%xmm5 571 pand %xmm7,%xmm5 572 psrldq $7,%xmm6 573 paddd %xmm5,%xmm3 574 movd %eax,%xmm5 575 paddd %xmm6,%xmm4 576 movd 12(%edi),%xmm6 577 paddd %xmm5,%xmm4 578 movdqa %xmm0,(%esp) 579 movdqa %xmm1,16(%esp) 580 movdqa %xmm2,32(%esp) 581 movdqa %xmm3,48(%esp) 582 movdqa %xmm4,64(%esp) 583 pmuludq %xmm6,%xmm0 584 pmuludq %xmm6,%xmm1 585 pmuludq %xmm6,%xmm2 586 movd 28(%edi),%xmm5 587 pmuludq %xmm6,%xmm3 588 pmuludq %xmm6,%xmm4 589 movdqa %xmm5,%xmm6 590 pmuludq 48(%esp),%xmm5 591 movdqa %xmm6,%xmm7 592 pmuludq 32(%esp),%xmm6 593 paddq %xmm5,%xmm4 594 movdqa %xmm7,%xmm5 595 pmuludq 16(%esp),%xmm7 596 paddq %xmm6,%xmm3 597 movd 92(%edi),%xmm6 598 pmuludq (%esp),%xmm5 599 paddq %xmm7,%xmm2 600 pmuludq 64(%esp),%xmm6 601 movd 44(%edi),%xmm7 602 paddq %xmm5,%xmm1 603 movdqa %xmm7,%xmm5 604 pmuludq 32(%esp),%xmm7 605 paddq %xmm6,%xmm0 606 movdqa %xmm5,%xmm6 607 pmuludq 16(%esp),%xmm5 608 paddq %xmm7,%xmm4 609 movd 108(%edi),%xmm7 610 pmuludq (%esp),%xmm6 611 paddq %xmm5,%xmm3 612 movdqa %xmm7,%xmm5 613 pmuludq 64(%esp),%xmm7 614 paddq %xmm6,%xmm2 615 pmuludq 48(%esp),%xmm5 616 movd 60(%edi),%xmm6 617 paddq %xmm7,%xmm1 618 movdqa %xmm6,%xmm7 619 pmuludq 16(%esp),%xmm6 620 paddq %xmm5,%xmm0 621 movd 124(%edi),%xmm5 622 pmuludq (%esp),%xmm7 623 paddq %xmm6,%xmm4 624 movdqa %xmm5,%xmm6 625 pmuludq 64(%esp),%xmm5 626 paddq %xmm7,%xmm3 627 movdqa %xmm6,%xmm7 628 pmuludq 48(%esp),%xmm6 629 paddq %xmm5,%xmm2 630 pmuludq 32(%esp),%xmm7 631 movd 76(%edi),%xmm5 632 paddq %xmm6,%xmm1 633 movd 140(%edi),%xmm6 634 pmuludq (%esp),%xmm5 635 paddq %xmm7,%xmm0 636 movdqa %xmm6,%xmm7 637 pmuludq 64(%esp),%xmm6 638 paddq %xmm5,%xmm4 639 movdqa %xmm7,%xmm5 640 pmuludq 16(%esp),%xmm7 641 paddq %xmm6,%xmm3 642 movdqa %xmm5,%xmm6 643 pmuludq 32(%esp),%xmm5 644 paddq %xmm7,%xmm0 645 pmuludq 48(%esp),%xmm6 646 movdqa 64(%ebx),%xmm7 647 paddq %xmm5,%xmm1 648 paddq %xmm6,%xmm2 649 movdqa %xmm3,%xmm5 650 pand %xmm7,%xmm3 651 psrlq $26,%xmm5 652 paddq %xmm4,%xmm5 653 movdqa %xmm0,%xmm6 654 pand %xmm7,%xmm0 655 psrlq $26,%xmm6 656 movdqa %xmm5,%xmm4 657 paddq %xmm1,%xmm6 658 psrlq $26,%xmm5 659 pand %xmm7,%xmm4 660 movdqa %xmm6,%xmm1 661 psrlq $26,%xmm6 662 paddd %xmm5,%xmm0 663 psllq $2,%xmm5 664 paddq %xmm2,%xmm6 665 paddq %xmm0,%xmm5 666 pand %xmm7,%xmm1 667 movdqa %xmm6,%xmm2 668 psrlq $26,%xmm6 669 pand %xmm7,%xmm2 670 paddd %xmm3,%xmm6 671 movdqa %xmm5,%xmm0 672 psrlq $26,%xmm5 673 movdqa %xmm6,%xmm3 674 psrlq $26,%xmm6 675 pand %xmm7,%xmm0 676 paddd %xmm5,%xmm1 677 pand %xmm7,%xmm3 678 paddd %xmm6,%xmm4 679 subl $16,%ecx 680 jz .L013done 681.L012even: 682 leal 384(%esp),%edx 683 leal -32(%esi),%eax 684 subl $64,%ecx 685 movdqu (%edi),%xmm5 686 pshufd $68,%xmm5,%xmm6 687 cmovbl %eax,%esi 688 pshufd $238,%xmm5,%xmm5 689 movdqa %xmm6,(%edx) 690 leal 160(%esp),%eax 691 movdqu 16(%edi),%xmm6 692 movdqa %xmm5,-144(%edx) 693 pshufd $68,%xmm6,%xmm5 694 pshufd $238,%xmm6,%xmm6 695 movdqa %xmm5,16(%edx) 696 movdqu 32(%edi),%xmm5 697 movdqa %xmm6,-128(%edx) 698 pshufd $68,%xmm5,%xmm6 699 pshufd $238,%xmm5,%xmm5 700 movdqa %xmm6,32(%edx) 701 movdqu 48(%edi),%xmm6 702 movdqa %xmm5,-112(%edx) 703 pshufd $68,%xmm6,%xmm5 704 pshufd $238,%xmm6,%xmm6 705 movdqa %xmm5,48(%edx) 706 movdqu 64(%edi),%xmm5 707 movdqa %xmm6,-96(%edx) 708 pshufd $68,%xmm5,%xmm6 709 pshufd $238,%xmm5,%xmm5 710 movdqa %xmm6,64(%edx) 711 movdqu 80(%edi),%xmm6 712 movdqa %xmm5,-80(%edx) 713 pshufd $68,%xmm6,%xmm5 714 pshufd $238,%xmm6,%xmm6 715 movdqa %xmm5,80(%edx) 716 movdqu 96(%edi),%xmm5 717 movdqa %xmm6,-64(%edx) 718 pshufd $68,%xmm5,%xmm6 719 pshufd $238,%xmm5,%xmm5 720 movdqa %xmm6,96(%edx) 721 movdqu 112(%edi),%xmm6 722 movdqa %xmm5,-48(%edx) 723 pshufd $68,%xmm6,%xmm5 724 pshufd $238,%xmm6,%xmm6 725 movdqa %xmm5,112(%edx) 726 movdqu 128(%edi),%xmm5 727 movdqa %xmm6,-32(%edx) 728 pshufd $68,%xmm5,%xmm6 729 pshufd $238,%xmm5,%xmm5 730 movdqa %xmm6,128(%edx) 731 movdqa %xmm5,-16(%edx) 732 movdqu 32(%esi),%xmm5 733 movdqu 48(%esi),%xmm6 734 leal 32(%esi),%esi 735 movdqa %xmm2,112(%esp) 736 movdqa %xmm3,128(%esp) 737 movdqa %xmm4,144(%esp) 738 movdqa %xmm5,%xmm2 739 movdqa %xmm6,%xmm3 740 psrldq $6,%xmm2 741 psrldq $6,%xmm3 742 movdqa %xmm5,%xmm4 743 punpcklqdq %xmm3,%xmm2 744 punpckhqdq %xmm6,%xmm4 745 punpcklqdq %xmm6,%xmm5 746 movdqa %xmm2,%xmm3 747 psrlq $4,%xmm2 748 psrlq $30,%xmm3 749 movdqa %xmm5,%xmm6 750 psrlq $40,%xmm4 751 psrlq $26,%xmm6 752 pand %xmm7,%xmm5 753 pand %xmm7,%xmm6 754 pand %xmm7,%xmm2 755 pand %xmm7,%xmm3 756 por (%ebx),%xmm4 757 movdqa %xmm0,80(%esp) 758 movdqa %xmm1,96(%esp) 759 jbe .L014skip_loop 760 jmp .L015loop 761.align 32 762.L015loop: 763 movdqa -144(%edx),%xmm7 764 movdqa %xmm6,16(%eax) 765 movdqa %xmm2,32(%eax) 766 movdqa %xmm3,48(%eax) 767 movdqa %xmm4,64(%eax) 768 movdqa %xmm5,%xmm1 769 pmuludq %xmm7,%xmm5 770 movdqa %xmm6,%xmm0 771 pmuludq %xmm7,%xmm6 772 pmuludq %xmm7,%xmm2 773 pmuludq %xmm7,%xmm3 774 pmuludq %xmm7,%xmm4 775 pmuludq -16(%edx),%xmm0 776 movdqa %xmm1,%xmm7 777 pmuludq -128(%edx),%xmm1 778 paddq %xmm5,%xmm0 779 movdqa %xmm7,%xmm5 780 pmuludq -112(%edx),%xmm7 781 paddq %xmm6,%xmm1 782 movdqa %xmm5,%xmm6 783 pmuludq -96(%edx),%xmm5 784 paddq %xmm7,%xmm2 785 movdqa 16(%eax),%xmm7 786 pmuludq -80(%edx),%xmm6 787 paddq %xmm5,%xmm3 788 movdqa %xmm7,%xmm5 789 pmuludq -128(%edx),%xmm7 790 paddq %xmm6,%xmm4 791 movdqa %xmm5,%xmm6 792 pmuludq -112(%edx),%xmm5 793 paddq %xmm7,%xmm2 794 movdqa 32(%eax),%xmm7 795 pmuludq -96(%edx),%xmm6 796 paddq %xmm5,%xmm3 797 movdqa %xmm7,%xmm5 798 pmuludq -32(%edx),%xmm7 799 paddq %xmm6,%xmm4 800 movdqa %xmm5,%xmm6 801 pmuludq -16(%edx),%xmm5 802 paddq %xmm7,%xmm0 803 movdqa %xmm6,%xmm7 804 pmuludq -128(%edx),%xmm6 805 paddq %xmm5,%xmm1 806 movdqa 48(%eax),%xmm5 807 pmuludq -112(%edx),%xmm7 808 paddq %xmm6,%xmm3 809 movdqa %xmm5,%xmm6 810 pmuludq -48(%edx),%xmm5 811 paddq %xmm7,%xmm4 812 movdqa %xmm6,%xmm7 813 pmuludq -32(%edx),%xmm6 814 paddq %xmm5,%xmm0 815 movdqa %xmm7,%xmm5 816 pmuludq -16(%edx),%xmm7 817 paddq %xmm6,%xmm1 818 movdqa 64(%eax),%xmm6 819 pmuludq -128(%edx),%xmm5 820 paddq %xmm7,%xmm2 821 movdqa %xmm6,%xmm7 822 pmuludq -16(%edx),%xmm6 823 paddq %xmm5,%xmm4 824 movdqa %xmm7,%xmm5 825 pmuludq -64(%edx),%xmm7 826 paddq %xmm6,%xmm3 827 movdqa %xmm5,%xmm6 828 pmuludq -48(%edx),%xmm5 829 paddq %xmm7,%xmm0 830 movdqa 64(%ebx),%xmm7 831 pmuludq -32(%edx),%xmm6 832 paddq %xmm5,%xmm1 833 paddq %xmm6,%xmm2 834 movdqu -32(%esi),%xmm5 835 movdqu -16(%esi),%xmm6 836 leal 32(%esi),%esi 837 movdqa %xmm2,32(%esp) 838 movdqa %xmm3,48(%esp) 839 movdqa %xmm4,64(%esp) 840 movdqa %xmm5,%xmm2 841 movdqa %xmm6,%xmm3 842 psrldq $6,%xmm2 843 psrldq $6,%xmm3 844 movdqa %xmm5,%xmm4 845 punpcklqdq %xmm3,%xmm2 846 punpckhqdq %xmm6,%xmm4 847 punpcklqdq %xmm6,%xmm5 848 movdqa %xmm2,%xmm3 849 psrlq $4,%xmm2 850 psrlq $30,%xmm3 851 movdqa %xmm5,%xmm6 852 psrlq $40,%xmm4 853 psrlq $26,%xmm6 854 pand %xmm7,%xmm5 855 pand %xmm7,%xmm6 856 pand %xmm7,%xmm2 857 pand %xmm7,%xmm3 858 por (%ebx),%xmm4 859 leal -32(%esi),%eax 860 subl $64,%ecx 861 paddd 80(%esp),%xmm5 862 paddd 96(%esp),%xmm6 863 paddd 112(%esp),%xmm2 864 paddd 128(%esp),%xmm3 865 paddd 144(%esp),%xmm4 866 cmovbl %eax,%esi 867 leal 160(%esp),%eax 868 movdqa (%edx),%xmm7 869 movdqa %xmm1,16(%esp) 870 movdqa %xmm6,16(%eax) 871 movdqa %xmm2,32(%eax) 872 movdqa %xmm3,48(%eax) 873 movdqa %xmm4,64(%eax) 874 movdqa %xmm5,%xmm1 875 pmuludq %xmm7,%xmm5 876 paddq %xmm0,%xmm5 877 movdqa %xmm6,%xmm0 878 pmuludq %xmm7,%xmm6 879 pmuludq %xmm7,%xmm2 880 pmuludq %xmm7,%xmm3 881 pmuludq %xmm7,%xmm4 882 paddq 16(%esp),%xmm6 883 paddq 32(%esp),%xmm2 884 paddq 48(%esp),%xmm3 885 paddq 64(%esp),%xmm4 886 pmuludq 128(%edx),%xmm0 887 movdqa %xmm1,%xmm7 888 pmuludq 16(%edx),%xmm1 889 paddq %xmm5,%xmm0 890 movdqa %xmm7,%xmm5 891 pmuludq 32(%edx),%xmm7 892 paddq %xmm6,%xmm1 893 movdqa %xmm5,%xmm6 894 pmuludq 48(%edx),%xmm5 895 paddq %xmm7,%xmm2 896 movdqa 16(%eax),%xmm7 897 pmuludq 64(%edx),%xmm6 898 paddq %xmm5,%xmm3 899 movdqa %xmm7,%xmm5 900 pmuludq 16(%edx),%xmm7 901 paddq %xmm6,%xmm4 902 movdqa %xmm5,%xmm6 903 pmuludq 32(%edx),%xmm5 904 paddq %xmm7,%xmm2 905 movdqa 32(%eax),%xmm7 906 pmuludq 48(%edx),%xmm6 907 paddq %xmm5,%xmm3 908 movdqa %xmm7,%xmm5 909 pmuludq 112(%edx),%xmm7 910 paddq %xmm6,%xmm4 911 movdqa %xmm5,%xmm6 912 pmuludq 128(%edx),%xmm5 913 paddq %xmm7,%xmm0 914 movdqa %xmm6,%xmm7 915 pmuludq 16(%edx),%xmm6 916 paddq %xmm5,%xmm1 917 movdqa 48(%eax),%xmm5 918 pmuludq 32(%edx),%xmm7 919 paddq %xmm6,%xmm3 920 movdqa %xmm5,%xmm6 921 pmuludq 96(%edx),%xmm5 922 paddq %xmm7,%xmm4 923 movdqa %xmm6,%xmm7 924 pmuludq 112(%edx),%xmm6 925 paddq %xmm5,%xmm0 926 movdqa %xmm7,%xmm5 927 pmuludq 128(%edx),%xmm7 928 paddq %xmm6,%xmm1 929 movdqa 64(%eax),%xmm6 930 pmuludq 16(%edx),%xmm5 931 paddq %xmm7,%xmm2 932 movdqa %xmm6,%xmm7 933 pmuludq 128(%edx),%xmm6 934 paddq %xmm5,%xmm4 935 movdqa %xmm7,%xmm5 936 pmuludq 80(%edx),%xmm7 937 paddq %xmm6,%xmm3 938 movdqa %xmm5,%xmm6 939 pmuludq 96(%edx),%xmm5 940 paddq %xmm7,%xmm0 941 movdqa 64(%ebx),%xmm7 942 pmuludq 112(%edx),%xmm6 943 paddq %xmm5,%xmm1 944 paddq %xmm6,%xmm2 945 movdqa %xmm3,%xmm5 946 pand %xmm7,%xmm3 947 psrlq $26,%xmm5 948 paddq %xmm4,%xmm5 949 movdqa %xmm0,%xmm6 950 pand %xmm7,%xmm0 951 psrlq $26,%xmm6 952 movdqa %xmm5,%xmm4 953 paddq %xmm1,%xmm6 954 psrlq $26,%xmm5 955 pand %xmm7,%xmm4 956 movdqa %xmm6,%xmm1 957 psrlq $26,%xmm6 958 paddd %xmm5,%xmm0 959 psllq $2,%xmm5 960 paddq %xmm2,%xmm6 961 paddq %xmm0,%xmm5 962 pand %xmm7,%xmm1 963 movdqa %xmm6,%xmm2 964 psrlq $26,%xmm6 965 pand %xmm7,%xmm2 966 paddd %xmm3,%xmm6 967 movdqa %xmm5,%xmm0 968 psrlq $26,%xmm5 969 movdqa %xmm6,%xmm3 970 psrlq $26,%xmm6 971 pand %xmm7,%xmm0 972 paddd %xmm5,%xmm1 973 pand %xmm7,%xmm3 974 paddd %xmm6,%xmm4 975 movdqu 32(%esi),%xmm5 976 movdqu 48(%esi),%xmm6 977 leal 32(%esi),%esi 978 movdqa %xmm2,112(%esp) 979 movdqa %xmm3,128(%esp) 980 movdqa %xmm4,144(%esp) 981 movdqa %xmm5,%xmm2 982 movdqa %xmm6,%xmm3 983 psrldq $6,%xmm2 984 psrldq $6,%xmm3 985 movdqa %xmm5,%xmm4 986 punpcklqdq %xmm3,%xmm2 987 punpckhqdq %xmm6,%xmm4 988 punpcklqdq %xmm6,%xmm5 989 movdqa %xmm2,%xmm3 990 psrlq $4,%xmm2 991 psrlq $30,%xmm3 992 movdqa %xmm5,%xmm6 993 psrlq $40,%xmm4 994 psrlq $26,%xmm6 995 pand %xmm7,%xmm5 996 pand %xmm7,%xmm6 997 pand %xmm7,%xmm2 998 pand %xmm7,%xmm3 999 por (%ebx),%xmm4 1000 movdqa %xmm0,80(%esp) 1001 movdqa %xmm1,96(%esp) 1002 ja .L015loop 1003.L014skip_loop: 1004 pshufd $16,-144(%edx),%xmm7 1005 addl $32,%ecx 1006 jnz .L016long_tail 1007 paddd %xmm0,%xmm5 1008 paddd %xmm1,%xmm6 1009 paddd 112(%esp),%xmm2 1010 paddd 128(%esp),%xmm3 1011 paddd 144(%esp),%xmm4 1012.L016long_tail: 1013 movdqa %xmm5,(%eax) 1014 movdqa %xmm6,16(%eax) 1015 movdqa %xmm2,32(%eax) 1016 movdqa %xmm3,48(%eax) 1017 movdqa %xmm4,64(%eax) 1018 pmuludq %xmm7,%xmm5 1019 pmuludq %xmm7,%xmm6 1020 pmuludq %xmm7,%xmm2 1021 movdqa %xmm5,%xmm0 1022 pshufd $16,-128(%edx),%xmm5 1023 pmuludq %xmm7,%xmm3 1024 movdqa %xmm6,%xmm1 1025 pmuludq %xmm7,%xmm4 1026 movdqa %xmm5,%xmm6 1027 pmuludq 48(%eax),%xmm5 1028 movdqa %xmm6,%xmm7 1029 pmuludq 32(%eax),%xmm6 1030 paddq %xmm5,%xmm4 1031 movdqa %xmm7,%xmm5 1032 pmuludq 16(%eax),%xmm7 1033 paddq %xmm6,%xmm3 1034 pshufd $16,-64(%edx),%xmm6 1035 pmuludq (%eax),%xmm5 1036 paddq %xmm7,%xmm2 1037 pmuludq 64(%eax),%xmm6 1038 pshufd $16,-112(%edx),%xmm7 1039 paddq %xmm5,%xmm1 1040 movdqa %xmm7,%xmm5 1041 pmuludq 32(%eax),%xmm7 1042 paddq %xmm6,%xmm0 1043 movdqa %xmm5,%xmm6 1044 pmuludq 16(%eax),%xmm5 1045 paddq %xmm7,%xmm4 1046 pshufd $16,-48(%edx),%xmm7 1047 pmuludq (%eax),%xmm6 1048 paddq %xmm5,%xmm3 1049 movdqa %xmm7,%xmm5 1050 pmuludq 64(%eax),%xmm7 1051 paddq %xmm6,%xmm2 1052 pmuludq 48(%eax),%xmm5 1053 pshufd $16,-96(%edx),%xmm6 1054 paddq %xmm7,%xmm1 1055 movdqa %xmm6,%xmm7 1056 pmuludq 16(%eax),%xmm6 1057 paddq %xmm5,%xmm0 1058 pshufd $16,-32(%edx),%xmm5 1059 pmuludq (%eax),%xmm7 1060 paddq %xmm6,%xmm4 1061 movdqa %xmm5,%xmm6 1062 pmuludq 64(%eax),%xmm5 1063 paddq %xmm7,%xmm3 1064 movdqa %xmm6,%xmm7 1065 pmuludq 48(%eax),%xmm6 1066 paddq %xmm5,%xmm2 1067 pmuludq 32(%eax),%xmm7 1068 pshufd $16,-80(%edx),%xmm5 1069 paddq %xmm6,%xmm1 1070 pshufd $16,-16(%edx),%xmm6 1071 pmuludq (%eax),%xmm5 1072 paddq %xmm7,%xmm0 1073 movdqa %xmm6,%xmm7 1074 pmuludq 64(%eax),%xmm6 1075 paddq %xmm5,%xmm4 1076 movdqa %xmm7,%xmm5 1077 pmuludq 16(%eax),%xmm7 1078 paddq %xmm6,%xmm3 1079 movdqa %xmm5,%xmm6 1080 pmuludq 32(%eax),%xmm5 1081 paddq %xmm7,%xmm0 1082 pmuludq 48(%eax),%xmm6 1083 movdqa 64(%ebx),%xmm7 1084 paddq %xmm5,%xmm1 1085 paddq %xmm6,%xmm2 1086 jz .L017short_tail 1087 movdqu -32(%esi),%xmm5 1088 movdqu -16(%esi),%xmm6 1089 leal 32(%esi),%esi 1090 movdqa %xmm2,32(%esp) 1091 movdqa %xmm3,48(%esp) 1092 movdqa %xmm4,64(%esp) 1093 movdqa %xmm5,%xmm2 1094 movdqa %xmm6,%xmm3 1095 psrldq $6,%xmm2 1096 psrldq $6,%xmm3 1097 movdqa %xmm5,%xmm4 1098 punpcklqdq %xmm3,%xmm2 1099 punpckhqdq %xmm6,%xmm4 1100 punpcklqdq %xmm6,%xmm5 1101 movdqa %xmm2,%xmm3 1102 psrlq $4,%xmm2 1103 psrlq $30,%xmm3 1104 movdqa %xmm5,%xmm6 1105 psrlq $40,%xmm4 1106 psrlq $26,%xmm6 1107 pand %xmm7,%xmm5 1108 pand %xmm7,%xmm6 1109 pand %xmm7,%xmm2 1110 pand %xmm7,%xmm3 1111 por (%ebx),%xmm4 1112 pshufd $16,(%edx),%xmm7 1113 paddd 80(%esp),%xmm5 1114 paddd 96(%esp),%xmm6 1115 paddd 112(%esp),%xmm2 1116 paddd 128(%esp),%xmm3 1117 paddd 144(%esp),%xmm4 1118 movdqa %xmm5,(%esp) 1119 pmuludq %xmm7,%xmm5 1120 movdqa %xmm6,16(%esp) 1121 pmuludq %xmm7,%xmm6 1122 paddq %xmm5,%xmm0 1123 movdqa %xmm2,%xmm5 1124 pmuludq %xmm7,%xmm2 1125 paddq %xmm6,%xmm1 1126 movdqa %xmm3,%xmm6 1127 pmuludq %xmm7,%xmm3 1128 paddq 32(%esp),%xmm2 1129 movdqa %xmm5,32(%esp) 1130 pshufd $16,16(%edx),%xmm5 1131 paddq 48(%esp),%xmm3 1132 movdqa %xmm6,48(%esp) 1133 movdqa %xmm4,%xmm6 1134 pmuludq %xmm7,%xmm4 1135 paddq 64(%esp),%xmm4 1136 movdqa %xmm6,64(%esp) 1137 movdqa %xmm5,%xmm6 1138 pmuludq 48(%esp),%xmm5 1139 movdqa %xmm6,%xmm7 1140 pmuludq 32(%esp),%xmm6 1141 paddq %xmm5,%xmm4 1142 movdqa %xmm7,%xmm5 1143 pmuludq 16(%esp),%xmm7 1144 paddq %xmm6,%xmm3 1145 pshufd $16,80(%edx),%xmm6 1146 pmuludq (%esp),%xmm5 1147 paddq %xmm7,%xmm2 1148 pmuludq 64(%esp),%xmm6 1149 pshufd $16,32(%edx),%xmm7 1150 paddq %xmm5,%xmm1 1151 movdqa %xmm7,%xmm5 1152 pmuludq 32(%esp),%xmm7 1153 paddq %xmm6,%xmm0 1154 movdqa %xmm5,%xmm6 1155 pmuludq 16(%esp),%xmm5 1156 paddq %xmm7,%xmm4 1157 pshufd $16,96(%edx),%xmm7 1158 pmuludq (%esp),%xmm6 1159 paddq %xmm5,%xmm3 1160 movdqa %xmm7,%xmm5 1161 pmuludq 64(%esp),%xmm7 1162 paddq %xmm6,%xmm2 1163 pmuludq 48(%esp),%xmm5 1164 pshufd $16,48(%edx),%xmm6 1165 paddq %xmm7,%xmm1 1166 movdqa %xmm6,%xmm7 1167 pmuludq 16(%esp),%xmm6 1168 paddq %xmm5,%xmm0 1169 pshufd $16,112(%edx),%xmm5 1170 pmuludq (%esp),%xmm7 1171 paddq %xmm6,%xmm4 1172 movdqa %xmm5,%xmm6 1173 pmuludq 64(%esp),%xmm5 1174 paddq %xmm7,%xmm3 1175 movdqa %xmm6,%xmm7 1176 pmuludq 48(%esp),%xmm6 1177 paddq %xmm5,%xmm2 1178 pmuludq 32(%esp),%xmm7 1179 pshufd $16,64(%edx),%xmm5 1180 paddq %xmm6,%xmm1 1181 pshufd $16,128(%edx),%xmm6 1182 pmuludq (%esp),%xmm5 1183 paddq %xmm7,%xmm0 1184 movdqa %xmm6,%xmm7 1185 pmuludq 64(%esp),%xmm6 1186 paddq %xmm5,%xmm4 1187 movdqa %xmm7,%xmm5 1188 pmuludq 16(%esp),%xmm7 1189 paddq %xmm6,%xmm3 1190 movdqa %xmm5,%xmm6 1191 pmuludq 32(%esp),%xmm5 1192 paddq %xmm7,%xmm0 1193 pmuludq 48(%esp),%xmm6 1194 movdqa 64(%ebx),%xmm7 1195 paddq %xmm5,%xmm1 1196 paddq %xmm6,%xmm2 1197.L017short_tail: 1198 pshufd $78,%xmm4,%xmm6 1199 pshufd $78,%xmm3,%xmm5 1200 paddq %xmm6,%xmm4 1201 paddq %xmm5,%xmm3 1202 pshufd $78,%xmm0,%xmm6 1203 pshufd $78,%xmm1,%xmm5 1204 paddq %xmm6,%xmm0 1205 paddq %xmm5,%xmm1 1206 pshufd $78,%xmm2,%xmm6 1207 movdqa %xmm3,%xmm5 1208 pand %xmm7,%xmm3 1209 psrlq $26,%xmm5 1210 paddq %xmm6,%xmm2 1211 paddq %xmm4,%xmm5 1212 movdqa %xmm0,%xmm6 1213 pand %xmm7,%xmm0 1214 psrlq $26,%xmm6 1215 movdqa %xmm5,%xmm4 1216 paddq %xmm1,%xmm6 1217 psrlq $26,%xmm5 1218 pand %xmm7,%xmm4 1219 movdqa %xmm6,%xmm1 1220 psrlq $26,%xmm6 1221 paddd %xmm5,%xmm0 1222 psllq $2,%xmm5 1223 paddq %xmm2,%xmm6 1224 paddq %xmm0,%xmm5 1225 pand %xmm7,%xmm1 1226 movdqa %xmm6,%xmm2 1227 psrlq $26,%xmm6 1228 pand %xmm7,%xmm2 1229 paddd %xmm3,%xmm6 1230 movdqa %xmm5,%xmm0 1231 psrlq $26,%xmm5 1232 movdqa %xmm6,%xmm3 1233 psrlq $26,%xmm6 1234 pand %xmm7,%xmm0 1235 paddd %xmm5,%xmm1 1236 pand %xmm7,%xmm3 1237 paddd %xmm6,%xmm4 1238.L013done: 1239 movd %xmm0,-48(%edi) 1240 movd %xmm1,-44(%edi) 1241 movd %xmm2,-40(%edi) 1242 movd %xmm3,-36(%edi) 1243 movd %xmm4,-32(%edi) 1244 movl %ebp,%esp 1245.L007nodata: 1246 popl %edi 1247 popl %esi 1248 popl %ebx 1249 popl %ebp 1250 ret 1251.size _poly1305_blocks_sse2,.-_poly1305_blocks_sse2 1252.align 32 1253.type _poly1305_emit_sse2,@function 1254.align 16 1255_poly1305_emit_sse2: 1256 pushl %ebp 1257 pushl %ebx 1258 pushl %esi 1259 pushl %edi 1260 movl 20(%esp),%ebp 1261 cmpl $0,20(%ebp) 1262 je .Lenter_emit 1263 movl (%ebp),%eax 1264 movl 4(%ebp),%edi 1265 movl 8(%ebp),%ecx 1266 movl 12(%ebp),%edx 1267 movl 16(%ebp),%esi 1268 movl %edi,%ebx 1269 shll $26,%edi 1270 shrl $6,%ebx 1271 addl %edi,%eax 1272 movl %ecx,%edi 1273 adcl $0,%ebx 1274 shll $20,%edi 1275 shrl $12,%ecx 1276 addl %edi,%ebx 1277 movl %edx,%edi 1278 adcl $0,%ecx 1279 shll $14,%edi 1280 shrl $18,%edx 1281 addl %edi,%ecx 1282 movl %esi,%edi 1283 adcl $0,%edx 1284 shll $8,%edi 1285 shrl $24,%esi 1286 addl %edi,%edx 1287 adcl $0,%esi 1288 movl %esi,%edi 1289 andl $3,%esi 1290 shrl $2,%edi 1291 leal (%edi,%edi,4),%ebp 1292 movl 24(%esp),%edi 1293 addl %ebp,%eax 1294 movl 28(%esp),%ebp 1295 adcl $0,%ebx 1296 adcl $0,%ecx 1297 adcl $0,%edx 1298 adcl $0,%esi 1299 movd %eax,%xmm0 1300 addl $5,%eax 1301 movd %ebx,%xmm1 1302 adcl $0,%ebx 1303 movd %ecx,%xmm2 1304 adcl $0,%ecx 1305 movd %edx,%xmm3 1306 adcl $0,%edx 1307 adcl $0,%esi 1308 shrl $2,%esi 1309 negl %esi 1310 andl %esi,%eax 1311 andl %esi,%ebx 1312 andl %esi,%ecx 1313 andl %esi,%edx 1314 movl %eax,(%edi) 1315 movd %xmm0,%eax 1316 movl %ebx,4(%edi) 1317 movd %xmm1,%ebx 1318 movl %ecx,8(%edi) 1319 movd %xmm2,%ecx 1320 movl %edx,12(%edi) 1321 movd %xmm3,%edx 1322 notl %esi 1323 andl %esi,%eax 1324 andl %esi,%ebx 1325 orl (%edi),%eax 1326 andl %esi,%ecx 1327 orl 4(%edi),%ebx 1328 andl %esi,%edx 1329 orl 8(%edi),%ecx 1330 orl 12(%edi),%edx 1331 addl (%ebp),%eax 1332 adcl 4(%ebp),%ebx 1333 movl %eax,(%edi) 1334 adcl 8(%ebp),%ecx 1335 movl %ebx,4(%edi) 1336 adcl 12(%ebp),%edx 1337 movl %ecx,8(%edi) 1338 movl %edx,12(%edi) 1339 popl %edi 1340 popl %esi 1341 popl %ebx 1342 popl %ebp 1343 ret 1344.size _poly1305_emit_sse2,.-_poly1305_emit_sse2 1345.align 64 1346.Lconst_sse2: 1347.long 16777216,0,16777216,0,16777216,0,16777216,0 1348.long 0,0,0,0,0,0,0,0 1349.long 67108863,0,67108863,0,67108863,0,67108863,0 1350.long 268435455,268435452,268435452,268435452 1351.byte 80,111,108,121,49,51,48,53,32,102,111,114,32,120,56,54 1352.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 1353.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 1354.byte 114,103,62,0 1355.align 4 1356.comm OPENSSL_ia32cap_P,16,4 1357