155714Skris#!/usr/local/bin/perl 255714Skris 3238405Sjkim$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 4238405Sjkimpush(@INC,"${dir}","${dir}../../perlasm"); 555714Skrisrequire "x86asm.pl"; 655714Skris 755714Skris&asm_init($ARGV[0],$0); 855714Skris 955714Skris&bn_mul_comba("bn_mul_comba8",8); 1055714Skris&bn_mul_comba("bn_mul_comba4",4); 1155714Skris&bn_sqr_comba("bn_sqr_comba8",8); 1255714Skris&bn_sqr_comba("bn_sqr_comba4",4); 1355714Skris 1455714Skris&asm_finish(); 1555714Skris 1655714Skrissub mul_add_c 1755714Skris { 1855714Skris local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; 1955714Skris 2055714Skris # pos == -1 if eax and edx are pre-loaded, 0 to load from next 2155714Skris # words, and 1 if load return value 2255714Skris 2355714Skris &comment("mul a[$ai]*b[$bi]"); 2455714Skris 2555714Skris # "eax" and "edx" will always be pre-loaded. 2655714Skris # &mov("eax",&DWP($ai*4,$a,"",0)) ; 2755714Skris # &mov("edx",&DWP($bi*4,$b,"",0)); 2855714Skris 2955714Skris &mul("edx"); 3055714Skris &add($c0,"eax"); 3155714Skris &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a 3255714Skris &mov("eax",&wparam(0)) if $pos > 0; # load r[] 3355714Skris ### 3455714Skris &adc($c1,"edx"); 3555714Skris &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b 3655714Skris &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b 3755714Skris ### 3855714Skris &adc($c2,0); 3955714Skris # is pos > 1, it means it is the last loop 4055714Skris &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[]; 4155714Skris &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a 4255714Skris } 4355714Skris 4455714Skrissub sqr_add_c 4555714Skris { 4655714Skris local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; 4755714Skris 4855714Skris # pos == -1 if eax and edx are pre-loaded, 0 to load from next 4955714Skris # words, and 1 if load return value 5055714Skris 5155714Skris &comment("sqr a[$ai]*a[$bi]"); 5255714Skris 5355714Skris # "eax" and "edx" will always be pre-loaded. 5455714Skris # &mov("eax",&DWP($ai*4,$a,"",0)) ; 5555714Skris # &mov("edx",&DWP($bi*4,$b,"",0)); 5655714Skris 5755714Skris if ($ai == $bi) 5855714Skris { &mul("eax");} 5955714Skris else 6055714Skris { &mul("edx");} 6155714Skris &add($c0,"eax"); 6255714Skris &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a 6355714Skris ### 6455714Skris &adc($c1,"edx"); 6555714Skris &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb); 6655714Skris ### 6755714Skris &adc($c2,0); 6855714Skris # is pos > 1, it means it is the last loop 6955714Skris &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; 7055714Skris &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b 7155714Skris } 7255714Skris 7355714Skrissub sqr_add_c2 7455714Skris { 7555714Skris local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_; 7655714Skris 7755714Skris # pos == -1 if eax and edx are pre-loaded, 0 to load from next 7855714Skris # words, and 1 if load return value 7955714Skris 8055714Skris &comment("sqr a[$ai]*a[$bi]"); 8155714Skris 8255714Skris # "eax" and "edx" will always be pre-loaded. 8355714Skris # &mov("eax",&DWP($ai*4,$a,"",0)) ; 8455714Skris # &mov("edx",&DWP($bi*4,$a,"",0)); 8555714Skris 8655714Skris if ($ai == $bi) 8755714Skris { &mul("eax");} 8855714Skris else 8955714Skris { &mul("edx");} 9055714Skris &add("eax","eax"); 9155714Skris ### 9255714Skris &adc("edx","edx"); 9355714Skris ### 9455714Skris &adc($c2,0); 9555714Skris &add($c0,"eax"); 9655714Skris &adc($c1,"edx"); 9755714Skris &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a 9855714Skris &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b 9955714Skris &adc($c2,0); 10055714Skris &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[]; 10155714Skris &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb); 10255714Skris ### 10355714Skris } 10455714Skris 10555714Skrissub bn_mul_comba 10655714Skris { 10755714Skris local($name,$num)=@_; 10855714Skris local($a,$b,$c0,$c1,$c2); 10955714Skris local($i,$as,$ae,$bs,$be,$ai,$bi); 11055714Skris local($tot,$end); 11155714Skris 11255714Skris &function_begin_B($name,""); 11355714Skris 11455714Skris $c0="ebx"; 11555714Skris $c1="ecx"; 11655714Skris $c2="ebp"; 11755714Skris $a="esi"; 11855714Skris $b="edi"; 11955714Skris 12055714Skris $as=0; 12155714Skris $ae=0; 12255714Skris $bs=0; 12355714Skris $be=0; 12455714Skris $tot=$num+$num-1; 12555714Skris 12655714Skris &push("esi"); 12755714Skris &mov($a,&wparam(1)); 12855714Skris &push("edi"); 12955714Skris &mov($b,&wparam(2)); 13055714Skris &push("ebp"); 13155714Skris &push("ebx"); 13255714Skris 13355714Skris &xor($c0,$c0); 13455714Skris &mov("eax",&DWP(0,$a,"",0)); # load the first word 13555714Skris &xor($c1,$c1); 13655714Skris &mov("edx",&DWP(0,$b,"",0)); # load the first second 13755714Skris 13855714Skris for ($i=0; $i<$tot; $i++) 13955714Skris { 14055714Skris $ai=$as; 14155714Skris $bi=$bs; 14255714Skris $end=$be+1; 14355714Skris 14455714Skris &comment("################## Calculate word $i"); 14555714Skris 14655714Skris for ($j=$bs; $j<$end; $j++) 14755714Skris { 14855714Skris &xor($c2,$c2) if ($j == $bs); 14955714Skris if (($j+1) == $end) 15055714Skris { 15155714Skris $v=1; 15255714Skris $v=2 if (($i+1) == $tot); 15355714Skris } 15455714Skris else 15555714Skris { $v=0; } 15655714Skris if (($j+1) != $end) 15755714Skris { 15855714Skris $na=($ai-1); 15955714Skris $nb=($bi+1); 16055714Skris } 16155714Skris else 16255714Skris { 16355714Skris $na=$as+($i < ($num-1)); 16455714Skris $nb=$bs+($i >= ($num-1)); 16555714Skris } 16655714Skris#printf STDERR "[$ai,$bi] -> [$na,$nb]\n"; 16755714Skris &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb); 16855714Skris if ($v) 16955714Skris { 17055714Skris &comment("saved r[$i]"); 17155714Skris # &mov("eax",&wparam(0)); 17255714Skris # &mov(&DWP($i*4,"eax","",0),$c0); 17355714Skris ($c0,$c1,$c2)=($c1,$c2,$c0); 17455714Skris } 17555714Skris $ai--; 17655714Skris $bi++; 17755714Skris } 17855714Skris $as++ if ($i < ($num-1)); 17955714Skris $ae++ if ($i >= ($num-1)); 18055714Skris 18155714Skris $bs++ if ($i >= ($num-1)); 18255714Skris $be++ if ($i < ($num-1)); 18355714Skris } 18455714Skris &comment("save r[$i]"); 18555714Skris # &mov("eax",&wparam(0)); 18655714Skris &mov(&DWP($i*4,"eax","",0),$c0); 18755714Skris 18855714Skris &pop("ebx"); 18955714Skris &pop("ebp"); 19055714Skris &pop("edi"); 19155714Skris &pop("esi"); 19255714Skris &ret(); 19355714Skris &function_end_B($name); 19455714Skris } 19555714Skris 19655714Skrissub bn_sqr_comba 19755714Skris { 19855714Skris local($name,$num)=@_; 19955714Skris local($r,$a,$c0,$c1,$c2)=@_; 20055714Skris local($i,$as,$ae,$bs,$be,$ai,$bi); 20155714Skris local($b,$tot,$end,$half); 20255714Skris 20355714Skris &function_begin_B($name,""); 20455714Skris 20555714Skris $c0="ebx"; 20655714Skris $c1="ecx"; 20755714Skris $c2="ebp"; 20855714Skris $a="esi"; 20955714Skris $r="edi"; 21055714Skris 21155714Skris &push("esi"); 21255714Skris &push("edi"); 21355714Skris &push("ebp"); 21455714Skris &push("ebx"); 21555714Skris &mov($r,&wparam(0)); 21655714Skris &mov($a,&wparam(1)); 21755714Skris &xor($c0,$c0); 21855714Skris &xor($c1,$c1); 21955714Skris &mov("eax",&DWP(0,$a,"",0)); # load the first word 22055714Skris 22155714Skris $as=0; 22255714Skris $ae=0; 22355714Skris $bs=0; 22455714Skris $be=0; 22555714Skris $tot=$num+$num-1; 22655714Skris 22755714Skris for ($i=0; $i<$tot; $i++) 22855714Skris { 22955714Skris $ai=$as; 23055714Skris $bi=$bs; 23155714Skris $end=$be+1; 23255714Skris 23355714Skris &comment("############### Calculate word $i"); 23455714Skris for ($j=$bs; $j<$end; $j++) 23555714Skris { 23655714Skris &xor($c2,$c2) if ($j == $bs); 23755714Skris if (($ai-1) < ($bi+1)) 23855714Skris { 23955714Skris $v=1; 24055714Skris $v=2 if ($i+1) == $tot; 24155714Skris } 24255714Skris else 24355714Skris { $v=0; } 24455714Skris if (!$v) 24555714Skris { 24655714Skris $na=$ai-1; 24755714Skris $nb=$bi+1; 24855714Skris } 24955714Skris else 25055714Skris { 25155714Skris $na=$as+($i < ($num-1)); 25255714Skris $nb=$bs+($i >= ($num-1)); 25355714Skris } 25455714Skris if ($ai == $bi) 25555714Skris { 25655714Skris &sqr_add_c($r,$a,$ai,$bi, 25755714Skris $c0,$c1,$c2,$v,$i,$na,$nb); 25855714Skris } 25955714Skris else 26055714Skris { 26155714Skris &sqr_add_c2($r,$a,$ai,$bi, 26255714Skris $c0,$c1,$c2,$v,$i,$na,$nb); 26355714Skris } 26455714Skris if ($v) 26555714Skris { 26655714Skris &comment("saved r[$i]"); 26755714Skris #&mov(&DWP($i*4,$r,"",0),$c0); 26855714Skris ($c0,$c1,$c2)=($c1,$c2,$c0); 26955714Skris last; 27055714Skris } 27155714Skris $ai--; 27255714Skris $bi++; 27355714Skris } 27455714Skris $as++ if ($i < ($num-1)); 27555714Skris $ae++ if ($i >= ($num-1)); 27655714Skris 27755714Skris $bs++ if ($i >= ($num-1)); 27855714Skris $be++ if ($i < ($num-1)); 27955714Skris } 28055714Skris &mov(&DWP($i*4,$r,"",0),$c0); 28155714Skris &pop("ebx"); 28255714Skris &pop("ebp"); 28355714Skris &pop("edi"); 28455714Skris &pop("esi"); 28555714Skris &ret(); 28655714Skris &function_end_B($name); 28755714Skris } 288