1238405Sjkim # $FreeBSD: releng/10.2/secure/lib/libcrypto/i386/x86-mont.s 238405 2012-07-12 19:30:53Z jkim $ 2238405Sjkim.file "x86-mont.s" 3238405Sjkim.text 4238405Sjkim.globl bn_mul_mont 5238405Sjkim.type bn_mul_mont,@function 6238405Sjkim.align 16 7238405Sjkimbn_mul_mont: 8238405Sjkim.L_bn_mul_mont_begin: 9238405Sjkim pushl %ebp 10238405Sjkim pushl %ebx 11238405Sjkim pushl %esi 12238405Sjkim pushl %edi 13238405Sjkim xorl %eax,%eax 14238405Sjkim movl 40(%esp),%edi 15238405Sjkim cmpl $4,%edi 16238405Sjkim jl .L000just_leave 17238405Sjkim leal 20(%esp),%esi 18238405Sjkim leal 24(%esp),%edx 19238405Sjkim movl %esp,%ebp 20238405Sjkim addl $2,%edi 21238405Sjkim negl %edi 22238405Sjkim leal -32(%esp,%edi,4),%esp 23238405Sjkim negl %edi 24238405Sjkim movl %esp,%eax 25238405Sjkim subl %edx,%eax 26238405Sjkim andl $2047,%eax 27238405Sjkim subl %eax,%esp 28238405Sjkim xorl %esp,%edx 29238405Sjkim andl $2048,%edx 30238405Sjkim xorl $2048,%edx 31238405Sjkim subl %edx,%esp 32238405Sjkim andl $-64,%esp 33238405Sjkim movl (%esi),%eax 34238405Sjkim movl 4(%esi),%ebx 35238405Sjkim movl 8(%esi),%ecx 36238405Sjkim movl 12(%esi),%edx 37238405Sjkim movl 16(%esi),%esi 38238405Sjkim movl (%esi),%esi 39238405Sjkim movl %eax,4(%esp) 40238405Sjkim movl %ebx,8(%esp) 41238405Sjkim movl %ecx,12(%esp) 42238405Sjkim movl %edx,16(%esp) 43238405Sjkim movl %esi,20(%esp) 44238405Sjkim leal -3(%edi),%ebx 45238405Sjkim movl %ebp,24(%esp) 46238405Sjkim leal OPENSSL_ia32cap_P,%eax 47238405Sjkim btl $26,(%eax) 48238405Sjkim jnc .L001non_sse2 49238405Sjkim movl $-1,%eax 50238405Sjkim movd %eax,%mm7 51238405Sjkim movl 8(%esp),%esi 52238405Sjkim movl 12(%esp),%edi 53238405Sjkim movl 16(%esp),%ebp 54238405Sjkim xorl %edx,%edx 55238405Sjkim xorl %ecx,%ecx 56238405Sjkim movd (%edi),%mm4 57238405Sjkim movd (%esi),%mm5 58238405Sjkim movd (%ebp),%mm3 59238405Sjkim pmuludq %mm4,%mm5 60238405Sjkim movq %mm5,%mm2 61238405Sjkim movq %mm5,%mm0 62238405Sjkim pand %mm7,%mm0 63238405Sjkim pmuludq 20(%esp),%mm5 64238405Sjkim pmuludq %mm5,%mm3 65238405Sjkim paddq %mm0,%mm3 66238405Sjkim movd 4(%ebp),%mm1 67238405Sjkim movd 4(%esi),%mm0 68238405Sjkim psrlq $32,%mm2 69238405Sjkim psrlq $32,%mm3 70238405Sjkim incl %ecx 71238405Sjkim.align 16 72238405Sjkim.L0021st: 73238405Sjkim pmuludq %mm4,%mm0 74238405Sjkim pmuludq %mm5,%mm1 75238405Sjkim paddq %mm0,%mm2 76238405Sjkim paddq %mm1,%mm3 77238405Sjkim movq %mm2,%mm0 78238405Sjkim pand %mm7,%mm0 79238405Sjkim movd 4(%ebp,%ecx,4),%mm1 80238405Sjkim paddq %mm0,%mm3 81238405Sjkim movd 4(%esi,%ecx,4),%mm0 82238405Sjkim psrlq $32,%mm2 83238405Sjkim movd %mm3,28(%esp,%ecx,4) 84238405Sjkim psrlq $32,%mm3 85238405Sjkim leal 1(%ecx),%ecx 86238405Sjkim cmpl %ebx,%ecx 87238405Sjkim jl .L0021st 88238405Sjkim pmuludq %mm4,%mm0 89238405Sjkim pmuludq %mm5,%mm1 90238405Sjkim paddq %mm0,%mm2 91238405Sjkim paddq %mm1,%mm3 92238405Sjkim movq %mm2,%mm0 93238405Sjkim pand %mm7,%mm0 94238405Sjkim paddq %mm0,%mm3 95238405Sjkim movd %mm3,28(%esp,%ecx,4) 96238405Sjkim psrlq $32,%mm2 97238405Sjkim psrlq $32,%mm3 98238405Sjkim paddq %mm2,%mm3 99238405Sjkim movq %mm3,32(%esp,%ebx,4) 100238405Sjkim incl %edx 101238405Sjkim.L003outer: 102238405Sjkim xorl %ecx,%ecx 103238405Sjkim movd (%edi,%edx,4),%mm4 104238405Sjkim movd (%esi),%mm5 105238405Sjkim movd 32(%esp),%mm6 106238405Sjkim movd (%ebp),%mm3 107238405Sjkim pmuludq %mm4,%mm5 108238405Sjkim paddq %mm6,%mm5 109238405Sjkim movq %mm5,%mm0 110238405Sjkim movq %mm5,%mm2 111238405Sjkim pand %mm7,%mm0 112238405Sjkim pmuludq 20(%esp),%mm5 113238405Sjkim pmuludq %mm5,%mm3 114238405Sjkim paddq %mm0,%mm3 115238405Sjkim movd 36(%esp),%mm6 116238405Sjkim movd 4(%ebp),%mm1 117238405Sjkim movd 4(%esi),%mm0 118238405Sjkim psrlq $32,%mm2 119238405Sjkim psrlq $32,%mm3 120238405Sjkim paddq %mm6,%mm2 121238405Sjkim incl %ecx 122238405Sjkim decl %ebx 123238405Sjkim.L004inner: 124238405Sjkim pmuludq %mm4,%mm0 125238405Sjkim pmuludq %mm5,%mm1 126238405Sjkim paddq %mm0,%mm2 127238405Sjkim paddq %mm1,%mm3 128238405Sjkim movq %mm2,%mm0 129238405Sjkim movd 36(%esp,%ecx,4),%mm6 130238405Sjkim pand %mm7,%mm0 131238405Sjkim movd 4(%ebp,%ecx,4),%mm1 132238405Sjkim paddq %mm0,%mm3 133238405Sjkim movd 4(%esi,%ecx,4),%mm0 134238405Sjkim psrlq $32,%mm2 135238405Sjkim movd %mm3,28(%esp,%ecx,4) 136238405Sjkim psrlq $32,%mm3 137238405Sjkim paddq %mm6,%mm2 138238405Sjkim decl %ebx 139238405Sjkim leal 1(%ecx),%ecx 140238405Sjkim jnz .L004inner 141238405Sjkim movl %ecx,%ebx 142238405Sjkim pmuludq %mm4,%mm0 143238405Sjkim pmuludq %mm5,%mm1 144238405Sjkim paddq %mm0,%mm2 145238405Sjkim paddq %mm1,%mm3 146238405Sjkim movq %mm2,%mm0 147238405Sjkim pand %mm7,%mm0 148238405Sjkim paddq %mm0,%mm3 149238405Sjkim movd %mm3,28(%esp,%ecx,4) 150238405Sjkim psrlq $32,%mm2 151238405Sjkim psrlq $32,%mm3 152238405Sjkim movd 36(%esp,%ebx,4),%mm6 153238405Sjkim paddq %mm2,%mm3 154238405Sjkim paddq %mm6,%mm3 155238405Sjkim movq %mm3,32(%esp,%ebx,4) 156238405Sjkim leal 1(%edx),%edx 157238405Sjkim cmpl %ebx,%edx 158238405Sjkim jle .L003outer 159238405Sjkim emms 160238405Sjkim jmp .L005common_tail 161238405Sjkim.align 16 162238405Sjkim.L001non_sse2: 163238405Sjkim movl 8(%esp),%esi 164238405Sjkim leal 1(%ebx),%ebp 165238405Sjkim movl 12(%esp),%edi 166238405Sjkim xorl %ecx,%ecx 167238405Sjkim movl %esi,%edx 168238405Sjkim andl $1,%ebp 169238405Sjkim subl %edi,%edx 170238405Sjkim leal 4(%edi,%ebx,4),%eax 171238405Sjkim orl %edx,%ebp 172238405Sjkim movl (%edi),%edi 173238405Sjkim jz .L006bn_sqr_mont 174238405Sjkim movl %eax,28(%esp) 175238405Sjkim movl (%esi),%eax 176238405Sjkim xorl %edx,%edx 177238405Sjkim.align 16 178238405Sjkim.L007mull: 179238405Sjkim movl %edx,%ebp 180238405Sjkim mull %edi 181238405Sjkim addl %eax,%ebp 182238405Sjkim leal 1(%ecx),%ecx 183238405Sjkim adcl $0,%edx 184238405Sjkim movl (%esi,%ecx,4),%eax 185238405Sjkim cmpl %ebx,%ecx 186238405Sjkim movl %ebp,28(%esp,%ecx,4) 187238405Sjkim jl .L007mull 188238405Sjkim movl %edx,%ebp 189238405Sjkim mull %edi 190238405Sjkim movl 20(%esp),%edi 191238405Sjkim addl %ebp,%eax 192238405Sjkim movl 16(%esp),%esi 193238405Sjkim adcl $0,%edx 194238405Sjkim imull 32(%esp),%edi 195238405Sjkim movl %eax,32(%esp,%ebx,4) 196238405Sjkim xorl %ecx,%ecx 197238405Sjkim movl %edx,36(%esp,%ebx,4) 198238405Sjkim movl %ecx,40(%esp,%ebx,4) 199238405Sjkim movl (%esi),%eax 200238405Sjkim mull %edi 201238405Sjkim addl 32(%esp),%eax 202238405Sjkim movl 4(%esi),%eax 203238405Sjkim adcl $0,%edx 204238405Sjkim incl %ecx 205238405Sjkim jmp .L0082ndmadd 206238405Sjkim.align 16 207238405Sjkim.L0091stmadd: 208238405Sjkim movl %edx,%ebp 209238405Sjkim mull %edi 210238405Sjkim addl 32(%esp,%ecx,4),%ebp 211238405Sjkim leal 1(%ecx),%ecx 212238405Sjkim adcl $0,%edx 213238405Sjkim addl %eax,%ebp 214238405Sjkim movl (%esi,%ecx,4),%eax 215238405Sjkim adcl $0,%edx 216238405Sjkim cmpl %ebx,%ecx 217238405Sjkim movl %ebp,28(%esp,%ecx,4) 218238405Sjkim jl .L0091stmadd 219238405Sjkim movl %edx,%ebp 220238405Sjkim mull %edi 221238405Sjkim addl 32(%esp,%ebx,4),%eax 222238405Sjkim movl 20(%esp),%edi 223238405Sjkim adcl $0,%edx 224238405Sjkim movl 16(%esp),%esi 225238405Sjkim addl %eax,%ebp 226238405Sjkim adcl $0,%edx 227238405Sjkim imull 32(%esp),%edi 228238405Sjkim xorl %ecx,%ecx 229238405Sjkim addl 36(%esp,%ebx,4),%edx 230238405Sjkim movl %ebp,32(%esp,%ebx,4) 231238405Sjkim adcl $0,%ecx 232238405Sjkim movl (%esi),%eax 233238405Sjkim movl %edx,36(%esp,%ebx,4) 234238405Sjkim movl %ecx,40(%esp,%ebx,4) 235238405Sjkim mull %edi 236238405Sjkim addl 32(%esp),%eax 237238405Sjkim movl 4(%esi),%eax 238238405Sjkim adcl $0,%edx 239238405Sjkim movl $1,%ecx 240238405Sjkim.align 16 241238405Sjkim.L0082ndmadd: 242238405Sjkim movl %edx,%ebp 243238405Sjkim mull %edi 244238405Sjkim addl 32(%esp,%ecx,4),%ebp 245238405Sjkim leal 1(%ecx),%ecx 246238405Sjkim adcl $0,%edx 247238405Sjkim addl %eax,%ebp 248238405Sjkim movl (%esi,%ecx,4),%eax 249238405Sjkim adcl $0,%edx 250238405Sjkim cmpl %ebx,%ecx 251238405Sjkim movl %ebp,24(%esp,%ecx,4) 252238405Sjkim jl .L0082ndmadd 253238405Sjkim movl %edx,%ebp 254238405Sjkim mull %edi 255238405Sjkim addl 32(%esp,%ebx,4),%ebp 256238405Sjkim adcl $0,%edx 257238405Sjkim addl %eax,%ebp 258238405Sjkim adcl $0,%edx 259238405Sjkim movl %ebp,28(%esp,%ebx,4) 260238405Sjkim xorl %eax,%eax 261238405Sjkim movl 12(%esp),%ecx 262238405Sjkim addl 36(%esp,%ebx,4),%edx 263238405Sjkim adcl 40(%esp,%ebx,4),%eax 264238405Sjkim leal 4(%ecx),%ecx 265238405Sjkim movl %edx,32(%esp,%ebx,4) 266238405Sjkim cmpl 28(%esp),%ecx 267238405Sjkim movl %eax,36(%esp,%ebx,4) 268238405Sjkim je .L005common_tail 269238405Sjkim movl (%ecx),%edi 270238405Sjkim movl 8(%esp),%esi 271238405Sjkim movl %ecx,12(%esp) 272238405Sjkim xorl %ecx,%ecx 273238405Sjkim xorl %edx,%edx 274238405Sjkim movl (%esi),%eax 275238405Sjkim jmp .L0091stmadd 276238405Sjkim.align 16 277238405Sjkim.L006bn_sqr_mont: 278238405Sjkim movl %ebx,(%esp) 279238405Sjkim movl %ecx,12(%esp) 280238405Sjkim movl %edi,%eax 281238405Sjkim mull %edi 282238405Sjkim movl %eax,32(%esp) 283238405Sjkim movl %edx,%ebx 284238405Sjkim shrl $1,%edx 285238405Sjkim andl $1,%ebx 286238405Sjkim incl %ecx 287238405Sjkim.align 16 288238405Sjkim.L010sqr: 289238405Sjkim movl (%esi,%ecx,4),%eax 290238405Sjkim movl %edx,%ebp 291238405Sjkim mull %edi 292238405Sjkim addl %ebp,%eax 293238405Sjkim leal 1(%ecx),%ecx 294238405Sjkim adcl $0,%edx 295238405Sjkim leal (%ebx,%eax,2),%ebp 296238405Sjkim shrl $31,%eax 297238405Sjkim cmpl (%esp),%ecx 298238405Sjkim movl %eax,%ebx 299238405Sjkim movl %ebp,28(%esp,%ecx,4) 300238405Sjkim jl .L010sqr 301238405Sjkim movl (%esi,%ecx,4),%eax 302238405Sjkim movl %edx,%ebp 303238405Sjkim mull %edi 304238405Sjkim addl %ebp,%eax 305238405Sjkim movl 20(%esp),%edi 306238405Sjkim adcl $0,%edx 307238405Sjkim movl 16(%esp),%esi 308238405Sjkim leal (%ebx,%eax,2),%ebp 309238405Sjkim imull 32(%esp),%edi 310238405Sjkim shrl $31,%eax 311238405Sjkim movl %ebp,32(%esp,%ecx,4) 312238405Sjkim leal (%eax,%edx,2),%ebp 313238405Sjkim movl (%esi),%eax 314238405Sjkim shrl $31,%edx 315238405Sjkim movl %ebp,36(%esp,%ecx,4) 316238405Sjkim movl %edx,40(%esp,%ecx,4) 317238405Sjkim mull %edi 318238405Sjkim addl 32(%esp),%eax 319238405Sjkim movl %ecx,%ebx 320238405Sjkim adcl $0,%edx 321238405Sjkim movl 4(%esi),%eax 322238405Sjkim movl $1,%ecx 323238405Sjkim.align 16 324238405Sjkim.L0113rdmadd: 325238405Sjkim movl %edx,%ebp 326238405Sjkim mull %edi 327238405Sjkim addl 32(%esp,%ecx,4),%ebp 328238405Sjkim adcl $0,%edx 329238405Sjkim addl %eax,%ebp 330238405Sjkim movl 4(%esi,%ecx,4),%eax 331238405Sjkim adcl $0,%edx 332238405Sjkim movl %ebp,28(%esp,%ecx,4) 333238405Sjkim movl %edx,%ebp 334238405Sjkim mull %edi 335238405Sjkim addl 36(%esp,%ecx,4),%ebp 336238405Sjkim leal 2(%ecx),%ecx 337238405Sjkim adcl $0,%edx 338238405Sjkim addl %eax,%ebp 339238405Sjkim movl (%esi,%ecx,4),%eax 340238405Sjkim adcl $0,%edx 341238405Sjkim cmpl %ebx,%ecx 342238405Sjkim movl %ebp,24(%esp,%ecx,4) 343238405Sjkim jl .L0113rdmadd 344238405Sjkim movl %edx,%ebp 345238405Sjkim mull %edi 346238405Sjkim addl 32(%esp,%ebx,4),%ebp 347238405Sjkim adcl $0,%edx 348238405Sjkim addl %eax,%ebp 349238405Sjkim adcl $0,%edx 350238405Sjkim movl %ebp,28(%esp,%ebx,4) 351238405Sjkim movl 12(%esp),%ecx 352238405Sjkim xorl %eax,%eax 353238405Sjkim movl 8(%esp),%esi 354238405Sjkim addl 36(%esp,%ebx,4),%edx 355238405Sjkim adcl 40(%esp,%ebx,4),%eax 356238405Sjkim movl %edx,32(%esp,%ebx,4) 357238405Sjkim cmpl %ebx,%ecx 358238405Sjkim movl %eax,36(%esp,%ebx,4) 359238405Sjkim je .L005common_tail 360238405Sjkim movl 4(%esi,%ecx,4),%edi 361238405Sjkim leal 1(%ecx),%ecx 362238405Sjkim movl %edi,%eax 363238405Sjkim movl %ecx,12(%esp) 364238405Sjkim mull %edi 365238405Sjkim addl 32(%esp,%ecx,4),%eax 366238405Sjkim adcl $0,%edx 367238405Sjkim movl %eax,32(%esp,%ecx,4) 368238405Sjkim xorl %ebp,%ebp 369238405Sjkim cmpl %ebx,%ecx 370238405Sjkim leal 1(%ecx),%ecx 371238405Sjkim je .L012sqrlast 372238405Sjkim movl %edx,%ebx 373238405Sjkim shrl $1,%edx 374238405Sjkim andl $1,%ebx 375238405Sjkim.align 16 376238405Sjkim.L013sqradd: 377238405Sjkim movl (%esi,%ecx,4),%eax 378238405Sjkim movl %edx,%ebp 379238405Sjkim mull %edi 380238405Sjkim addl %ebp,%eax 381238405Sjkim leal (%eax,%eax,1),%ebp 382238405Sjkim adcl $0,%edx 383238405Sjkim shrl $31,%eax 384238405Sjkim addl 32(%esp,%ecx,4),%ebp 385238405Sjkim leal 1(%ecx),%ecx 386238405Sjkim adcl $0,%eax 387238405Sjkim addl %ebx,%ebp 388238405Sjkim adcl $0,%eax 389238405Sjkim cmpl (%esp),%ecx 390238405Sjkim movl %ebp,28(%esp,%ecx,4) 391238405Sjkim movl %eax,%ebx 392238405Sjkim jle .L013sqradd 393238405Sjkim movl %edx,%ebp 394238405Sjkim addl %edx,%edx 395238405Sjkim shrl $31,%ebp 396238405Sjkim addl %ebx,%edx 397238405Sjkim adcl $0,%ebp 398238405Sjkim.L012sqrlast: 399238405Sjkim movl 20(%esp),%edi 400238405Sjkim movl 16(%esp),%esi 401238405Sjkim imull 32(%esp),%edi 402238405Sjkim addl 32(%esp,%ecx,4),%edx 403238405Sjkim movl (%esi),%eax 404238405Sjkim adcl $0,%ebp 405238405Sjkim movl %edx,32(%esp,%ecx,4) 406238405Sjkim movl %ebp,36(%esp,%ecx,4) 407238405Sjkim mull %edi 408238405Sjkim addl 32(%esp),%eax 409238405Sjkim leal -1(%ecx),%ebx 410238405Sjkim adcl $0,%edx 411238405Sjkim movl $1,%ecx 412238405Sjkim movl 4(%esi),%eax 413238405Sjkim jmp .L0113rdmadd 414238405Sjkim.align 16 415238405Sjkim.L005common_tail: 416238405Sjkim movl 16(%esp),%ebp 417238405Sjkim movl 4(%esp),%edi 418238405Sjkim leal 32(%esp),%esi 419238405Sjkim movl (%esi),%eax 420238405Sjkim movl %ebx,%ecx 421238405Sjkim xorl %edx,%edx 422238405Sjkim.align 16 423238405Sjkim.L014sub: 424238405Sjkim sbbl (%ebp,%edx,4),%eax 425238405Sjkim movl %eax,(%edi,%edx,4) 426238405Sjkim decl %ecx 427238405Sjkim movl 4(%esi,%edx,4),%eax 428238405Sjkim leal 1(%edx),%edx 429238405Sjkim jge .L014sub 430238405Sjkim sbbl $0,%eax 431238405Sjkim andl %eax,%esi 432238405Sjkim notl %eax 433238405Sjkim movl %edi,%ebp 434238405Sjkim andl %eax,%ebp 435238405Sjkim orl %ebp,%esi 436238405Sjkim.align 16 437238405Sjkim.L015copy: 438238405Sjkim movl (%esi,%ebx,4),%eax 439238405Sjkim movl %eax,(%edi,%ebx,4) 440238405Sjkim movl %ecx,32(%esp,%ebx,4) 441238405Sjkim decl %ebx 442238405Sjkim jge .L015copy 443238405Sjkim movl 24(%esp),%esp 444238405Sjkim movl $1,%eax 445238405Sjkim.L000just_leave: 446238405Sjkim popl %edi 447238405Sjkim popl %esi 448238405Sjkim popl %ebx 449238405Sjkim popl %ebp 450238405Sjkim ret 451238405Sjkim.size bn_mul_mont,.-.L_bn_mul_mont_begin 452238405Sjkim.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 453238405Sjkim.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 454238405Sjkim.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 455238405Sjkim.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 456238405Sjkim.byte 111,114,103,62,0 457238405Sjkim.comm OPENSSL_ia32cap_P,8,4 458