bn-586.pl revision 109998
1#!/usr/local/bin/perl 2 3push(@INC,"perlasm","../../perlasm"); 4require "x86asm.pl"; 5 6&asm_init($ARGV[0],$0); 7 8&bn_mul_add_words("bn_mul_add_words"); 9&bn_mul_words("bn_mul_words"); 10&bn_sqr_words("bn_sqr_words"); 11&bn_div_words("bn_div_words"); 12&bn_add_words("bn_add_words"); 13&bn_sub_words("bn_sub_words"); 14&bn_sub_part_words("bn_sub_part_words"); 15 16&asm_finish(); 17 18sub bn_mul_add_words 19 { 20 local($name)=@_; 21 22 &function_begin($name,""); 23 24 &comment(""); 25 $Low="eax"; 26 $High="edx"; 27 $a="ebx"; 28 $w="ebp"; 29 $r="edi"; 30 $c="esi"; 31 32 &xor($c,$c); # clear carry 33 &mov($r,&wparam(0)); # 34 35 &mov("ecx",&wparam(2)); # 36 &mov($a,&wparam(1)); # 37 38 &and("ecx",0xfffffff8); # num / 8 39 &mov($w,&wparam(3)); # 40 41 &push("ecx"); # Up the stack for a tmp variable 42 43 &jz(&label("maw_finish")); 44 45 &set_label("maw_loop",0); 46 47 &mov(&swtmp(0),"ecx"); # 48 49 for ($i=0; $i<32; $i+=4) 50 { 51 &comment("Round $i"); 52 53 &mov("eax",&DWP($i,$a,"",0)); # *a 54 &mul($w); # *a * w 55 &add("eax",$c); # L(t)+= *r 56 &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r 57 &adc("edx",0); # H(t)+=carry 58 &add("eax",$c); # L(t)+=c 59 &adc("edx",0); # H(t)+=carry 60 &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); 61 &mov($c,"edx"); # c= H(t); 62 } 63 64 &comment(""); 65 &mov("ecx",&swtmp(0)); # 66 &add($a,32); 67 &add($r,32); 68 &sub("ecx",8); 69 &jnz(&label("maw_loop")); 70 71 &set_label("maw_finish",0); 72 &mov("ecx",&wparam(2)); # get num 73 &and("ecx",7); 74 &jnz(&label("maw_finish2")); # helps branch prediction 75 &jmp(&label("maw_end")); 76 77 &set_label("maw_finish2",1); 78 for ($i=0; $i<7; $i++) 79 { 80 &comment("Tail Round $i"); 81 &mov("eax",&DWP($i*4,$a,"",0));# *a 82 &mul($w); # *a * w 83 &add("eax",$c); # L(t)+=c 84 &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r 85 &adc("edx",0); # H(t)+=carry 86 &add("eax",$c); 87 &adc("edx",0); # H(t)+=carry 88 &dec("ecx") if ($i != 7-1); 89 &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t); 90 &mov($c,"edx"); # c= H(t); 91 &jz(&label("maw_end")) if ($i != 7-1); 92 } 93 &set_label("maw_end",0); 94 &mov("eax",$c); 95 96 &pop("ecx"); # clear variable from 97 98 &function_end($name); 99 } 100 101sub bn_mul_words 102 { 103 local($name)=@_; 104 105 &function_begin($name,""); 106 107 &comment(""); 108 $Low="eax"; 109 $High="edx"; 110 $a="ebx"; 111 $w="ecx"; 112 $r="edi"; 113 $c="esi"; 114 $num="ebp"; 115 116 &xor($c,$c); # clear carry 117 &mov($r,&wparam(0)); # 118 &mov($a,&wparam(1)); # 119 &mov($num,&wparam(2)); # 120 &mov($w,&wparam(3)); # 121 122 &and($num,0xfffffff8); # num / 8 123 &jz(&label("mw_finish")); 124 125 &set_label("mw_loop",0); 126 for ($i=0; $i<32; $i+=4) 127 { 128 &comment("Round $i"); 129 130 &mov("eax",&DWP($i,$a,"",0)); # *a 131 &mul($w); # *a * w 132 &add("eax",$c); # L(t)+=c 133 # XXX 134 135 &adc("edx",0); # H(t)+=carry 136 &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); 137 138 &mov($c,"edx"); # c= H(t); 139 } 140 141 &comment(""); 142 &add($a,32); 143 &add($r,32); 144 &sub($num,8); 145 &jz(&label("mw_finish")); 146 &jmp(&label("mw_loop")); 147 148 &set_label("mw_finish",0); 149 &mov($num,&wparam(2)); # get num 150 &and($num,7); 151 &jnz(&label("mw_finish2")); 152 &jmp(&label("mw_end")); 153 154 &set_label("mw_finish2",1); 155 for ($i=0; $i<7; $i++) 156 { 157 &comment("Tail Round $i"); 158 &mov("eax",&DWP($i*4,$a,"",0));# *a 159 &mul($w); # *a * w 160 &add("eax",$c); # L(t)+=c 161 # XXX 162 &adc("edx",0); # H(t)+=carry 163 &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t); 164 &mov($c,"edx"); # c= H(t); 165 &dec($num) if ($i != 7-1); 166 &jz(&label("mw_end")) if ($i != 7-1); 167 } 168 &set_label("mw_end",0); 169 &mov("eax",$c); 170 171 &function_end($name); 172 } 173 174sub bn_sqr_words 175 { 176 local($name)=@_; 177 178 &function_begin($name,""); 179 180 &comment(""); 181 $r="esi"; 182 $a="edi"; 183 $num="ebx"; 184 185 &mov($r,&wparam(0)); # 186 &mov($a,&wparam(1)); # 187 &mov($num,&wparam(2)); # 188 189 &and($num,0xfffffff8); # num / 8 190 &jz(&label("sw_finish")); 191 192 &set_label("sw_loop",0); 193 for ($i=0; $i<32; $i+=4) 194 { 195 &comment("Round $i"); 196 &mov("eax",&DWP($i,$a,"",0)); # *a 197 # XXX 198 &mul("eax"); # *a * *a 199 &mov(&DWP($i*2,$r,"",0),"eax"); # 200 &mov(&DWP($i*2+4,$r,"",0),"edx");# 201 } 202 203 &comment(""); 204 &add($a,32); 205 &add($r,64); 206 &sub($num,8); 207 &jnz(&label("sw_loop")); 208 209 &set_label("sw_finish",0); 210 &mov($num,&wparam(2)); # get num 211 &and($num,7); 212 &jz(&label("sw_end")); 213 214 for ($i=0; $i<7; $i++) 215 { 216 &comment("Tail Round $i"); 217 &mov("eax",&DWP($i*4,$a,"",0)); # *a 218 # XXX 219 &mul("eax"); # *a * *a 220 &mov(&DWP($i*8,$r,"",0),"eax"); # 221 &dec($num) if ($i != 7-1); 222 &mov(&DWP($i*8+4,$r,"",0),"edx"); 223 &jz(&label("sw_end")) if ($i != 7-1); 224 } 225 &set_label("sw_end",0); 226 227 &function_end($name); 228 } 229 230sub bn_div_words 231 { 232 local($name)=@_; 233 234 &function_begin($name,""); 235 &mov("edx",&wparam(0)); # 236 &mov("eax",&wparam(1)); # 237 &mov("ebx",&wparam(2)); # 238 &div("ebx"); 239 &function_end($name); 240 } 241 242sub bn_add_words 243 { 244 local($name)=@_; 245 246 &function_begin($name,""); 247 248 &comment(""); 249 $a="esi"; 250 $b="edi"; 251 $c="eax"; 252 $r="ebx"; 253 $tmp1="ecx"; 254 $tmp2="edx"; 255 $num="ebp"; 256 257 &mov($r,&wparam(0)); # get r 258 &mov($a,&wparam(1)); # get a 259 &mov($b,&wparam(2)); # get b 260 &mov($num,&wparam(3)); # get num 261 &xor($c,$c); # clear carry 262 &and($num,0xfffffff8); # num / 8 263 264 &jz(&label("aw_finish")); 265 266 &set_label("aw_loop",0); 267 for ($i=0; $i<8; $i++) 268 { 269 &comment("Round $i"); 270 271 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 272 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b 273 &add($tmp1,$c); 274 &mov($c,0); 275 &adc($c,$c); 276 &add($tmp1,$tmp2); 277 &adc($c,0); 278 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 279 } 280 281 &comment(""); 282 &add($a,32); 283 &add($b,32); 284 &add($r,32); 285 &sub($num,8); 286 &jnz(&label("aw_loop")); 287 288 &set_label("aw_finish",0); 289 &mov($num,&wparam(3)); # get num 290 &and($num,7); 291 &jz(&label("aw_end")); 292 293 for ($i=0; $i<7; $i++) 294 { 295 &comment("Tail Round $i"); 296 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 297 &mov($tmp2,&DWP($i*4,$b,"",0));# *b 298 &add($tmp1,$c); 299 &mov($c,0); 300 &adc($c,$c); 301 &add($tmp1,$tmp2); 302 &adc($c,0); 303 &dec($num) if ($i != 6); 304 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 305 &jz(&label("aw_end")) if ($i != 6); 306 } 307 &set_label("aw_end",0); 308 309# &mov("eax",$c); # $c is "eax" 310 311 &function_end($name); 312 } 313 314sub bn_sub_words 315 { 316 local($name)=@_; 317 318 &function_begin($name,""); 319 320 &comment(""); 321 $a="esi"; 322 $b="edi"; 323 $c="eax"; 324 $r="ebx"; 325 $tmp1="ecx"; 326 $tmp2="edx"; 327 $num="ebp"; 328 329 &mov($r,&wparam(0)); # get r 330 &mov($a,&wparam(1)); # get a 331 &mov($b,&wparam(2)); # get b 332 &mov($num,&wparam(3)); # get num 333 &xor($c,$c); # clear carry 334 &and($num,0xfffffff8); # num / 8 335 336 &jz(&label("aw_finish")); 337 338 &set_label("aw_loop",0); 339 for ($i=0; $i<8; $i++) 340 { 341 &comment("Round $i"); 342 343 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 344 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b 345 &sub($tmp1,$c); 346 &mov($c,0); 347 &adc($c,$c); 348 &sub($tmp1,$tmp2); 349 &adc($c,0); 350 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 351 } 352 353 &comment(""); 354 &add($a,32); 355 &add($b,32); 356 &add($r,32); 357 &sub($num,8); 358 &jnz(&label("aw_loop")); 359 360 &set_label("aw_finish",0); 361 &mov($num,&wparam(3)); # get num 362 &and($num,7); 363 &jz(&label("aw_end")); 364 365 for ($i=0; $i<7; $i++) 366 { 367 &comment("Tail Round $i"); 368 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 369 &mov($tmp2,&DWP($i*4,$b,"",0));# *b 370 &sub($tmp1,$c); 371 &mov($c,0); 372 &adc($c,$c); 373 &sub($tmp1,$tmp2); 374 &adc($c,0); 375 &dec($num) if ($i != 6); 376 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 377 &jz(&label("aw_end")) if ($i != 6); 378 } 379 &set_label("aw_end",0); 380 381# &mov("eax",$c); # $c is "eax" 382 383 &function_end($name); 384 } 385 386sub bn_sub_part_words 387 { 388 local($name)=@_; 389 390 &function_begin($name,""); 391 392 &comment(""); 393 $a="esi"; 394 $b="edi"; 395 $c="eax"; 396 $r="ebx"; 397 $tmp1="ecx"; 398 $tmp2="edx"; 399 $num="ebp"; 400 401 &mov($r,&wparam(0)); # get r 402 &mov($a,&wparam(1)); # get a 403 &mov($b,&wparam(2)); # get b 404 &mov($num,&wparam(3)); # get num 405 &xor($c,$c); # clear carry 406 &and($num,0xfffffff8); # num / 8 407 408 &jz(&label("aw_finish")); 409 410 &set_label("aw_loop",0); 411 for ($i=0; $i<8; $i++) 412 { 413 &comment("Round $i"); 414 415 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 416 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b 417 &sub($tmp1,$c); 418 &mov($c,0); 419 &adc($c,$c); 420 &sub($tmp1,$tmp2); 421 &adc($c,0); 422 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 423 } 424 425 &comment(""); 426 &add($a,32); 427 &add($b,32); 428 &add($r,32); 429 &sub($num,8); 430 &jnz(&label("aw_loop")); 431 432 &set_label("aw_finish",0); 433 &mov($num,&wparam(3)); # get num 434 &and($num,7); 435 &jz(&label("aw_end")); 436 437 for ($i=0; $i<7; $i++) 438 { 439 &comment("Tail Round $i"); 440 &mov($tmp1,&DWP(0,$a,"",0)); # *a 441 &mov($tmp2,&DWP(0,$b,"",0));# *b 442 &sub($tmp1,$c); 443 &mov($c,0); 444 &adc($c,$c); 445 &sub($tmp1,$tmp2); 446 &adc($c,0); 447 &mov(&DWP(0,$r,"",0),$tmp1); # *r 448 &add($a, 4); 449 &add($b, 4); 450 &add($r, 4); 451 &dec($num) if ($i != 6); 452 &jz(&label("aw_end")) if ($i != 6); 453 } 454 &set_label("aw_end",0); 455 456 &cmp(&wparam(4),0); 457 &je(&label("pw_end")); 458 459 &mov($num,&wparam(4)); # get dl 460 &cmp($num,0); 461 &je(&label("pw_end")); 462 &jge(&label("pw_pos")); 463 464 &comment("pw_neg"); 465 &mov($tmp2,0); 466 &sub($tmp2,$num); 467 &mov($num,$tmp2); 468 &and($num,0xfffffff8); # num / 8 469 &jz(&label("pw_neg_finish")); 470 471 &set_label("pw_neg_loop",0); 472 for ($i=0; $i<8; $i++) 473 { 474 &comment("dl<0 Round $i"); 475 476 &mov($tmp1,0); 477 &mov($tmp2,&DWP($i*4,$b,"",0)); # *b 478 &sub($tmp1,$c); 479 &mov($c,0); 480 &adc($c,$c); 481 &sub($tmp1,$tmp2); 482 &adc($c,0); 483 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 484 } 485 486 &comment(""); 487 &add($b,32); 488 &add($r,32); 489 &sub($num,8); 490 &jnz(&label("pw_neg_loop")); 491 492 &set_label("pw_neg_finish",0); 493 &mov($tmp2,&wparam(4)); # get dl 494 &mov($num,0); 495 &sub($num,$tmp2); 496 &and($num,7); 497 &jz(&label("pw_end")); 498 499 for ($i=0; $i<7; $i++) 500 { 501 &comment("dl<0 Tail Round $i"); 502 &mov($tmp1,0); 503 &mov($tmp2,&DWP($i*4,$b,"",0));# *b 504 &sub($tmp1,$c); 505 &mov($c,0); 506 &adc($c,$c); 507 &sub($tmp1,$tmp2); 508 &adc($c,0); 509 &dec($num) if ($i != 6); 510 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 511 &jz(&label("pw_end")) if ($i != 6); 512 } 513 514 &jmp(&label("pw_end")); 515 516 &set_label("pw_pos",0); 517 518 &and($num,0xfffffff8); # num / 8 519 &jz(&label("pw_pos_finish")); 520 521 &set_label("pw_pos_loop",0); 522 523 for ($i=0; $i<8; $i++) 524 { 525 &comment("dl>0 Round $i"); 526 527 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 528 &sub($tmp1,$c); 529 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 530 &jnc(&label("pw_nc".$i)); 531 } 532 533 &comment(""); 534 &add($a,32); 535 &add($r,32); 536 &sub($num,8); 537 &jnz(&label("pw_pos_loop")); 538 539 &set_label("pw_pos_finish",0); 540 &mov($num,&wparam(4)); # get dl 541 &and($num,7); 542 &jz(&label("pw_end")); 543 544 for ($i=0; $i<7; $i++) 545 { 546 &comment("dl>0 Tail Round $i"); 547 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 548 &sub($tmp1,$c); 549 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 550 &jnc(&label("pw_tail_nc".$i)); 551 &dec($num) if ($i != 6); 552 &jz(&label("pw_end")) if ($i != 6); 553 } 554 &mov($c,1); 555 &jmp(&label("pw_end")); 556 557 &set_label("pw_nc_loop",0); 558 for ($i=0; $i<8; $i++) 559 { 560 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 561 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 562 &set_label("pw_nc".$i,0); 563 } 564 565 &comment(""); 566 &add($a,32); 567 &add($r,32); 568 &sub($num,8); 569 &jnz(&label("pw_nc_loop")); 570 571 &mov($num,&wparam(4)); # get dl 572 &and($num,7); 573 &jz(&label("pw_nc_end")); 574 575 for ($i=0; $i<7; $i++) 576 { 577 &mov($tmp1,&DWP($i*4,$a,"",0)); # *a 578 &mov(&DWP($i*4,$r,"",0),$tmp1); # *r 579 &set_label("pw_tail_nc".$i,0); 580 &dec($num) if ($i != 6); 581 &jz(&label("pw_nc_end")) if ($i != 6); 582 } 583 584 &set_label("pw_nc_end",0); 585 &mov($c,0); 586 587 &set_label("pw_end",0); 588 589# &mov("eax",$c); # $c is "eax" 590 591 &function_end($name); 592 } 593 594