bn-586.pl revision 127128
1296417Sdim#!/usr/local/bin/perl 2254721Semaste 3353358Sdimpush(@INC,"perlasm","../../perlasm"); 4353358Sdimrequire "x86asm.pl"; 5353358Sdim 6254721Semaste&asm_init($ARGV[0],$0); 7254721Semaste 8254721Semaste&bn_mul_add_words("bn_mul_add_words"); 9254721Semaste&bn_mul_words("bn_mul_words"); 10254721Semaste&bn_sqr_words("bn_sqr_words"); 11254721Semaste&bn_div_words("bn_div_words"); 12296417Sdim&bn_add_words("bn_add_words"); 13296417Sdim&bn_sub_words("bn_sub_words"); 14309124Sdim#&bn_sub_part_words("bn_sub_part_words"); 15296417Sdim 16296417Sdim&asm_finish(); 17254721Semaste 18314564Sdimsub bn_mul_add_words 19254721Semaste { 20254721Semaste local($name)=@_; 21258884Semaste 22262528Semaste &function_begin($name,""); 23254721Semaste 24262528Semaste &comment(""); 25262528Semaste $Low="eax"; 26314564Sdim $High="edx"; 27314564Sdim $a="ebx"; 28314564Sdim $w="ebp"; 29360784Sdim $r="edi"; 30314564Sdim $c="esi"; 31258054Semaste 32314564Sdim &xor($c,$c); # clear carry 33314564Sdim &mov($r,&wparam(0)); # 34314564Sdim 35314564Sdim &mov("ecx",&wparam(2)); # 36314564Sdim &mov($a,&wparam(1)); # 37314564Sdim 38314564Sdim &and("ecx",0xfffffff8); # num / 8 39314564Sdim &mov($w,&wparam(3)); # 40314564Sdim 41314564Sdim &push("ecx"); # Up the stack for a tmp variable 42314564Sdim 43314564Sdim &jz(&label("maw_finish")); 44314564Sdim 45314564Sdim &set_label("maw_loop",0); 46314564Sdim 47314564Sdim &mov(&swtmp(0),"ecx"); # 48314564Sdim 49314564Sdim for ($i=0; $i<32; $i+=4) 50314564Sdim { 51314564Sdim &comment("Round $i"); 52314564Sdim 53314564Sdim &mov("eax",&DWP($i,$a,"",0)); # *a 54314564Sdim &mul($w); # *a * w 55314564Sdim &add("eax",$c); # L(t)+= *r 56314564Sdim &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r 57314564Sdim &adc("edx",0); # H(t)+=carry 58314564Sdim &add("eax",$c); # L(t)+=c 59314564Sdim &adc("edx",0); # H(t)+=carry 60314564Sdim &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); 61314564Sdim &mov($c,"edx"); # c= H(t); 62314564Sdim } 63314564Sdim 64314564Sdim &comment(""); 65314564Sdim &mov("ecx",&swtmp(0)); # 66314564Sdim &add($a,32); 67314564Sdim &add($r,32); 68314564Sdim &sub("ecx",8); 69314564Sdim &jnz(&label("maw_loop")); 70314564Sdim 71262528Semaste &set_label("maw_finish",0); 72314564Sdim &mov("ecx",&wparam(2)); # get num 73314564Sdim &and("ecx",7); 74314564Sdim &jnz(&label("maw_finish2")); # helps branch prediction 75258054Semaste &jmp(&label("maw_end")); 76314564Sdim 77314564Sdim &set_label("maw_finish2",1); 78258054Semaste for ($i=0; $i<7; $i++) 79314564Sdim { 80314564Sdim &comment("Tail Round $i"); 81314564Sdim &mov("eax",&DWP($i*4,$a,"",0));# *a 82314564Sdim &mul($w); # *a * w 83314564Sdim &add("eax",$c); # L(t)+=c 84353358Sdim &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r 85314564Sdim &adc("edx",0); # H(t)+=carry 86314564Sdim &add("eax",$c); 87296417Sdim &adc("edx",0); # H(t)+=carry 88314564Sdim &dec("ecx") if ($i != 7-1); 89314564Sdim &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t); 90314564Sdim &mov($c,"edx"); # c= H(t); 91314564Sdim &jz(&label("maw_end")) if ($i != 7-1); 92296417Sdim } 93314564Sdim &set_label("maw_end",0); 94314564Sdim &mov("eax",$c); 95314564Sdim 96314564Sdim &pop("ecx"); # clear variable from 97314564Sdim 98314564Sdim &function_end($name); 99314564Sdim } 100314564Sdim 101314564Sdimsub bn_mul_words 102314564Sdim { 103314564Sdim local($name)=@_; 104314564Sdim 105314564Sdim &function_begin($name,""); 106314564Sdim 107314564Sdim &comment(""); 108314564Sdim $Low="eax"; 109314564Sdim $High="edx"; 110314564Sdim $a="ebx"; 111314564Sdim $w="ecx"; 112314564Sdim $r="edi"; 113314564Sdim $c="esi"; 114314564Sdim $num="ebp"; 115314564Sdim 116314564Sdim &xor($c,$c); # clear carry 117314564Sdim &mov($r,&wparam(0)); # 118314564Sdim &mov($a,&wparam(1)); # 119314564Sdim &mov($num,&wparam(2)); # 120314564Sdim &mov($w,&wparam(3)); # 121314564Sdim 122314564Sdim &and($num,0xfffffff8); # num / 8 123314564Sdim &jz(&label("mw_finish")); 124314564Sdim 125314564Sdim &set_label("mw_loop",0); 126314564Sdim for ($i=0; $i<32; $i+=4) 127314564Sdim { 128314564Sdim &comment("Round $i"); 129314564Sdim 130314564Sdim &mov("eax",&DWP($i,$a,"",0)); # *a 131314564Sdim &mul($w); # *a * w 132314564Sdim &add("eax",$c); # L(t)+=c 133314564Sdim # XXX 134314564Sdim 135314564Sdim &adc("edx",0); # H(t)+=carry 136314564Sdim &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); 137314564Sdim 138314564Sdim &mov($c,"edx"); # c= H(t); 139314564Sdim } 140314564Sdim 141314564Sdim &comment(""); 142314564Sdim &add($a,32); 143314564Sdim &add($r,32); 144296417Sdim &sub($num,8); 145314564Sdim &jz(&label("mw_finish")); 146314564Sdim &jmp(&label("mw_loop")); 147314564Sdim 148314564Sdim &set_label("mw_finish",0); 149314564Sdim &mov($num,&wparam(2)); # get num 150314564Sdim &and($num,7); 151296417Sdim &jnz(&label("mw_finish2")); 152314564Sdim &jmp(&label("mw_end")); 153314564Sdim 154314564Sdim &set_label("mw_finish2",1); 155314564Sdim for ($i=0; $i<7; $i++) 156314564Sdim { 157314564Sdim &comment("Tail Round $i"); 158314564Sdim &mov("eax",&DWP($i*4,$a,"",0));# *a 159314564Sdim &mul($w); # *a * w 160296417Sdim &add("eax",$c); # L(t)+=c 161314564Sdim # XXX 162314564Sdim &adc("edx",0); # H(t)+=carry 163314564Sdim &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t); 164314564Sdim &mov($c,"edx"); # c= H(t); 165314564Sdim &dec($num) if ($i != 7-1); 166314564Sdim &jz(&label("mw_end")) if ($i != 7-1); 167314564Sdim } 168314564Sdim &set_label("mw_end",0); 169314564Sdim &mov("eax",$c); 170314564Sdim 171314564Sdim &function_end($name); 172314564Sdim } 173314564Sdim 174296417Sdimsub bn_sqr_words 175314564Sdim { 176314564Sdim local($name)=@_; 177314564Sdim 178296417Sdim &function_begin($name,""); 179314564Sdim 180314564Sdim &comment(""); 181314564Sdim $r="esi"; 182314564Sdim $a="edi"; 183314564Sdim $num="ebx"; 184314564Sdim 185314564Sdim &mov($r,&wparam(0)); # 186314564Sdim &mov($a,&wparam(1)); # 187314564Sdim &mov($num,&wparam(2)); # 188314564Sdim 189360784Sdim &and($num,0xfffffff8); # num / 8 190314564Sdim &jz(&label("sw_finish")); 191314564Sdim 192314564Sdim &set_label("sw_loop",0); 193314564Sdim for ($i=0; $i<32; $i+=4) 194314564Sdim { 195314564Sdim &comment("Round $i"); 196314564Sdim &mov("eax",&DWP($i,$a,"",0)); # *a 197314564Sdim # XXX 198314564Sdim &mul("eax"); # *a * *a 199314564Sdim &mov(&DWP($i*2,$r,"",0),"eax"); # 200314564Sdim &mov(&DWP($i*2+4,$r,"",0),"edx");# 201314564Sdim } 202314564Sdim 203314564Sdim &comment(""); 204314564Sdim &add($a,32); 205314564Sdim &add($r,64); 206314564Sdim &sub($num,8); 207314564Sdim &jnz(&label("sw_loop")); 208314564Sdim 209314564Sdim &set_label("sw_finish",0); 210314564Sdim &mov($num,&wparam(2)); # get num 211314564Sdim &and($num,7); 212314564Sdim &jz(&label("sw_end")); 213314564Sdim 214314564Sdim for ($i=0; $i<7; $i++) 215314564Sdim { 216314564Sdim &comment("Tail Round $i"); 217314564Sdim &mov("eax",&DWP($i*4,$a,"",0)); # *a 218314564Sdim # XXX 219314564Sdim &mul("eax"); # *a * *a 220314564Sdim &mov(&DWP($i*8,$r,"",0),"eax"); # 221314564Sdim &dec($num) if ($i != 7-1); 222314564Sdim &mov(&DWP($i*8+4,$r,"",0),"edx"); 223314564Sdim &jz(&label("sw_end")) if ($i != 7-1); 224314564Sdim } 225314564Sdim &set_label("sw_end",0); 226314564Sdim 227314564Sdim &function_end($name); 228314564Sdim } 229314564Sdim 230314564Sdimsub bn_div_words 231314564Sdim { 232314564Sdim local($name)=@_; 233314564Sdim 234314564Sdim &function_begin($name,""); 235314564Sdim &mov("edx",&wparam(0)); # 236314564Sdim &mov("eax",&wparam(1)); # 237314564Sdim &mov("ebx",&wparam(2)); # 238314564Sdim &div("ebx"); 239314564Sdim &function_end($name); 240314564Sdim } 241314564Sdim 242314564Sdimsub bn_add_words 243314564Sdim { 244314564Sdim local($name)=@_; 245314564Sdim 246314564Sdim &function_begin($name,""); 247258054Semaste 248314564Sdim &comment(""); 249314564Sdim $a="esi"; 250314564Sdim $b="edi"; 251314564Sdim $c="eax"; 252314564Sdim $r="ebx"; 253314564Sdim $tmp1="ecx"; 254314564Sdim $tmp2="edx"; 255314564Sdim $num="ebp"; 256314564Sdim 257314564Sdim &mov($r,&wparam(0)); # get r 258314564Sdim &mov($a,&wparam(1)); # get a 259314564Sdim &mov($b,&wparam(2)); # get b 260314564Sdim &mov($num,&wparam(3)); # get num 261314564Sdim &xor($c,$c); # clear carry 262314564Sdim &and($num,0xfffffff8); # num / 8 263314564Sdim 264314564Sdim &jz(&label("aw_finish")); 265314564Sdim 266314564Sdim &set_label("aw_loop",0); 267314564Sdim for ($i=0; $i<8; $i++) 268314564Sdim { 269314564Sdim &comment("Round $i"); 270314564Sdim 271314564Sdim &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 272314564Sdim &mov($tmp2,&DWP($i*4,$b,"",0)); # *b 273314564Sdim &add($tmp1,$c); 274314564Sdim &mov($c,0); 275314564Sdim &adc($c,$c); 276314564Sdim &add($tmp1,$tmp2); 277296417Sdim &adc($c,0); 278314564Sdim &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 279314564Sdim } 280314564Sdim 281344779Sdim &comment(""); 282314564Sdim &add($a,32); 283314564Sdim &add($b,32); 284314564Sdim &add($r,32); 285314564Sdim &sub($num,8); 286314564Sdim &jnz(&label("aw_loop")); 287360784Sdim 288314564Sdim &set_label("aw_finish",0); 289314564Sdim &mov($num,&wparam(3)); # get num 290360784Sdim &and($num,7); 291314564Sdim &jz(&label("aw_end")); 292314564Sdim 293360784Sdim for ($i=0; $i<7; $i++) 294314564Sdim { 295314564Sdim &comment("Tail Round $i"); 296314564Sdim &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 297314564Sdim &mov($tmp2,&DWP($i*4,$b,"",0));# *b 298314564Sdim &add($tmp1,$c); 299314564Sdim &mov($c,0); 300314564Sdim &adc($c,$c); 301314564Sdim &add($tmp1,$tmp2); 302314564Sdim &adc($c,0); 303314564Sdim &dec($num) if ($i != 6); 304314564Sdim &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 305314564Sdim &jz(&label("aw_end")) if ($i != 6); 306314564Sdim } 307314564Sdim &set_label("aw_end",0); 308314564Sdim 309314564Sdim# &mov("eax",$c); # $c is "eax" 310314564Sdim 311314564Sdim &function_end($name); 312314564Sdim } 313314564Sdim 314314564Sdimsub bn_sub_words 315314564Sdim { 316314564Sdim local($name)=@_; 317314564Sdim 318314564Sdim &function_begin($name,""); 319314564Sdim 320314564Sdim &comment(""); 321314564Sdim $a="esi"; 322314564Sdim $b="edi"; 323314564Sdim $c="eax"; 324314564Sdim $r="ebx"; 325309124Sdim $tmp1="ecx"; 326314564Sdim $tmp2="edx"; 327309124Sdim $num="ebp"; 328314564Sdim 329296417Sdim &mov($r,&wparam(0)); # get r 330314564Sdim &mov($a,&wparam(1)); # get a 331296417Sdim &mov($b,&wparam(2)); # get b 332314564Sdim &mov($num,&wparam(3)); # get num 333314564Sdim &xor($c,$c); # clear carry 334314564Sdim &and($num,0xfffffff8); # num / 8 335314564Sdim 336314564Sdim &jz(&label("aw_finish")); 337314564Sdim 338314564Sdim &set_label("aw_loop",0); 339314564Sdim for ($i=0; $i<8; $i++) 340314564Sdim { 341314564Sdim &comment("Round $i"); 342360784Sdim 343314564Sdim &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 344314564Sdim &mov($tmp2,&DWP($i*4,$b,"",0)); # *b 345314564Sdim &sub($tmp1,$c); 346314564Sdim &mov($c,0); 347314564Sdim &adc($c,$c); 348314564Sdim &sub($tmp1,$tmp2); 349314564Sdim &adc($c,0); 350314564Sdim &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 351314564Sdim } 352314564Sdim 353314564Sdim &comment(""); 354314564Sdim &add($a,32); 355314564Sdim &add($b,32); 356314564Sdim &add($r,32); 357314564Sdim &sub($num,8); 358314564Sdim &jnz(&label("aw_loop")); 359314564Sdim 360314564Sdim &set_label("aw_finish",0); 361314564Sdim &mov($num,&wparam(3)); # get num 362314564Sdim &and($num,7); 363314564Sdim &jz(&label("aw_end")); 364314564Sdim 365314564Sdim for ($i=0; $i<7; $i++) 366314564Sdim { 367254721Semaste &comment("Tail Round $i"); 368254721Semaste &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 369296417Sdim &mov($tmp2,&DWP($i*4,$b,"",0));# *b 370 &sub($tmp1,$c); 371 &mov($c,0); 372 &adc($c,$c); 373 &sub($tmp1,$tmp2); 374 &adc($c,0); 375 &dec($num) if ($i != 6); 376 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 377 &jz(&label("aw_end")) if ($i != 6); 378 } 379 &set_label("aw_end",0); 380 381# &mov("eax",$c); # $c is "eax" 382 383 &function_end($name); 384 } 385 386sub bn_sub_part_words 387 { 388 local($name)=@_; 389 390 &function_begin($name,""); 391 392 &comment(""); 393 $a="esi"; 394 $b="edi"; 395 $c="eax"; 396 $r="ebx"; 397 $tmp1="ecx"; 398 $tmp2="edx"; 399 $num="ebp"; 400 401 &mov($r,&wparam(0)); # get r 402 &mov($a,&wparam(1)); # get a 403 &mov($b,&wparam(2)); # get b 404 &mov($num,&wparam(3)); # get num 405 &xor($c,$c); # clear carry 406 &and($num,0xfffffff8); # num / 8 407 408 &jz(&label("aw_finish")); 409 410 &set_label("aw_loop",0); 411 for ($i=0; $i<8; $i++) 412 { 413 &comment("Round $i"); 414 415 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 416 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b 417 &sub($tmp1,$c); 418 &mov($c,0); 419 &adc($c,$c); 420 &sub($tmp1,$tmp2); 421 &adc($c,0); 422 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 423 } 424 425 &comment(""); 426 &add($a,32); 427 &add($b,32); 428 &add($r,32); 429 &sub($num,8); 430 &jnz(&label("aw_loop")); 431 432 &set_label("aw_finish",0); 433 &mov($num,&wparam(3)); # get num 434 &and($num,7); 435 &jz(&label("aw_end")); 436 437 for ($i=0; $i<7; $i++) 438 { 439 &comment("Tail Round $i"); 440 &mov($tmp1,&DWP(0,$a,"",0)); # *a 441 &mov($tmp2,&DWP(0,$b,"",0));# *b 442 &sub($tmp1,$c); 443 &mov($c,0); 444 &adc($c,$c); 445 &sub($tmp1,$tmp2); 446 &adc($c,0); 447 &mov(&DWP(0,$r,"",0),$tmp1); # *r 448 &add($a, 4); 449 &add($b, 4); 450 &add($r, 4); 451 &dec($num) if ($i != 6); 452 &jz(&label("aw_end")) if ($i != 6); 453 } 454 &set_label("aw_end",0); 455 456 &cmp(&wparam(4),0); 457 &je(&label("pw_end")); 458 459 &mov($num,&wparam(4)); # get dl 460 &cmp($num,0); 461 &je(&label("pw_end")); 462 &jge(&label("pw_pos")); 463 464 &comment("pw_neg"); 465 &mov($tmp2,0); 466 &sub($tmp2,$num); 467 &mov($num,$tmp2); 468 &and($num,0xfffffff8); # num / 8 469 &jz(&label("pw_neg_finish")); 470 471 &set_label("pw_neg_loop",0); 472 for ($i=0; $i<8; $i++) 473 { 474 &comment("dl<0 Round $i"); 475 476 &mov($tmp1,0); 477 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b 478 &sub($tmp1,$c); 479 &mov($c,0); 480 &adc($c,$c); 481 &sub($tmp1,$tmp2); 482 &adc($c,0); 483 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 484 } 485 486 &comment(""); 487 &add($b,32); 488 &add($r,32); 489 &sub($num,8); 490 &jnz(&label("pw_neg_loop")); 491 492 &set_label("pw_neg_finish",0); 493 &mov($tmp2,&wparam(4)); # get dl 494 &mov($num,0); 495 &sub($num,$tmp2); 496 &and($num,7); 497 &jz(&label("pw_end")); 498 499 for ($i=0; $i<7; $i++) 500 { 501 &comment("dl<0 Tail Round $i"); 502 &mov($tmp1,0); 503 &mov($tmp2,&DWP($i*4,$b,"",0));# *b 504 &sub($tmp1,$c); 505 &mov($c,0); 506 &adc($c,$c); 507 &sub($tmp1,$tmp2); 508 &adc($c,0); 509 &dec($num) if ($i != 6); 510 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 511 &jz(&label("pw_end")) if ($i != 6); 512 } 513 514 &jmp(&label("pw_end")); 515 516 &set_label("pw_pos",0); 517 518 &and($num,0xfffffff8); # num / 8 519 &jz(&label("pw_pos_finish")); 520 521 &set_label("pw_pos_loop",0); 522 523 for ($i=0; $i<8; $i++) 524 { 525 &comment("dl>0 Round $i"); 526 527 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 528 &sub($tmp1,$c); 529 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 530 &jnc(&label("pw_nc".$i)); 531 } 532 533 &comment(""); 534 &add($a,32); 535 &add($r,32); 536 &sub($num,8); 537 &jnz(&label("pw_pos_loop")); 538 539 &set_label("pw_pos_finish",0); 540 &mov($num,&wparam(4)); # get dl 541 &and($num,7); 542 &jz(&label("pw_end")); 543 544 for ($i=0; $i<7; $i++) 545 { 546 &comment("dl>0 Tail Round $i"); 547 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 548 &sub($tmp1,$c); 549 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 550 &jnc(&label("pw_tail_nc".$i)); 551 &dec($num) if ($i != 6); 552 &jz(&label("pw_end")) if ($i != 6); 553 } 554 &mov($c,1); 555 &jmp(&label("pw_end")); 556 557 &set_label("pw_nc_loop",0); 558 for ($i=0; $i<8; $i++) 559 { 560 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 561 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 562 &set_label("pw_nc".$i,0); 563 } 564 565 &comment(""); 566 &add($a,32); 567 &add($r,32); 568 &sub($num,8); 569 &jnz(&label("pw_nc_loop")); 570 571 &mov($num,&wparam(4)); # get dl 572 &and($num,7); 573 &jz(&label("pw_nc_end")); 574 575 for ($i=0; $i<7; $i++) 576 { 577 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 578 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 579 &set_label("pw_tail_nc".$i,0); 580 &dec($num) if ($i != 6); 581 &jz(&label("pw_nc_end")) if ($i != 6); 582 } 583 584 &set_label("pw_nc_end",0); 585 &mov($c,0); 586 587 &set_label("pw_end",0); 588 589# &mov("eax",$c); # $c is "eax" 590 591 &function_end($name); 592 } 593 594