155714Skris#!/usr/local/bin/perl
255714Skris
3238405Sjkim$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
4238405Sjkimpush(@INC,"${dir}","${dir}../../perlasm");
555714Skrisrequire "x86asm.pl";
655714Skris
755714Skris&asm_init($ARGV[0],$0);
855714Skris
955714Skris&bn_mul_comba("bn_mul_comba8",8);
1055714Skris&bn_mul_comba("bn_mul_comba4",4);
1155714Skris&bn_sqr_comba("bn_sqr_comba8",8);
1255714Skris&bn_sqr_comba("bn_sqr_comba4",4);
1355714Skris
1455714Skris&asm_finish();
1555714Skris
1655714Skrissub mul_add_c
1755714Skris	{
1855714Skris	local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
1955714Skris
2055714Skris	# pos == -1 if eax and edx are pre-loaded, 0 to load from next
2155714Skris	# words, and 1 if load return value
2255714Skris
2355714Skris	&comment("mul a[$ai]*b[$bi]");
2455714Skris
2555714Skris	# "eax" and "edx" will always be pre-loaded.
2655714Skris	# &mov("eax",&DWP($ai*4,$a,"",0)) ;
2755714Skris	# &mov("edx",&DWP($bi*4,$b,"",0));
2855714Skris
2955714Skris	&mul("edx");
3055714Skris	&add($c0,"eax");
3155714Skris	 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0;	# laod next a
3255714Skris	 &mov("eax",&wparam(0)) if $pos > 0;			# load r[]
3355714Skris	 ###
3455714Skris	&adc($c1,"edx");
3555714Skris	 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0;	# laod next b
3655714Skris	 &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1;	# laod next b
3755714Skris	 ###
3855714Skris	&adc($c2,0);
3955714Skris	 # is pos > 1, it means it is the last loop
4055714Skris	 &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0;		# save r[];
4155714Skris	&mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1;		# laod next a
4255714Skris	}
4355714Skris
4455714Skrissub sqr_add_c
4555714Skris	{
4655714Skris	local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
4755714Skris
4855714Skris	# pos == -1 if eax and edx are pre-loaded, 0 to load from next
4955714Skris	# words, and 1 if load return value
5055714Skris
5155714Skris	&comment("sqr a[$ai]*a[$bi]");
5255714Skris
5355714Skris	# "eax" and "edx" will always be pre-loaded.
5455714Skris	# &mov("eax",&DWP($ai*4,$a,"",0)) ;
5555714Skris	# &mov("edx",&DWP($bi*4,$b,"",0));
5655714Skris
5755714Skris	if ($ai == $bi)
5855714Skris		{ &mul("eax");}
5955714Skris	else
6055714Skris		{ &mul("edx");}
6155714Skris	&add($c0,"eax");
6255714Skris	 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0;	# load next a
6355714Skris	 ###
6455714Skris	&adc($c1,"edx");
6555714Skris	 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb);
6655714Skris	 ###
6755714Skris	&adc($c2,0);
6855714Skris	 # is pos > 1, it means it is the last loop
6955714Skris	 &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0;		# save r[];
7055714Skris	&mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1;		# load next b
7155714Skris	}
7255714Skris
7355714Skrissub sqr_add_c2
7455714Skris	{
7555714Skris	local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
7655714Skris
7755714Skris	# pos == -1 if eax and edx are pre-loaded, 0 to load from next
7855714Skris	# words, and 1 if load return value
7955714Skris
8055714Skris	&comment("sqr a[$ai]*a[$bi]");
8155714Skris
8255714Skris	# "eax" and "edx" will always be pre-loaded.
8355714Skris	# &mov("eax",&DWP($ai*4,$a,"",0)) ;
8455714Skris	# &mov("edx",&DWP($bi*4,$a,"",0));
8555714Skris
8655714Skris	if ($ai == $bi)
8755714Skris		{ &mul("eax");}
8855714Skris	else
8955714Skris		{ &mul("edx");}
9055714Skris	&add("eax","eax");
9155714Skris	 ###
9255714Skris	&adc("edx","edx");
9355714Skris	 ###
9455714Skris	&adc($c2,0);
9555714Skris	 &add($c0,"eax");
9655714Skris	&adc($c1,"edx");
9755714Skris	 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0;	# load next a
9855714Skris	 &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1;	# load next b
9955714Skris	&adc($c2,0);
10055714Skris	&mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0;		# save r[];
10155714Skris	 &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb);
10255714Skris	 ###
10355714Skris	}
10455714Skris
10555714Skrissub bn_mul_comba
10655714Skris	{
10755714Skris	local($name,$num)=@_;
10855714Skris	local($a,$b,$c0,$c1,$c2);
10955714Skris	local($i,$as,$ae,$bs,$be,$ai,$bi);
11055714Skris	local($tot,$end);
11155714Skris
11255714Skris	&function_begin_B($name,"");
11355714Skris
11455714Skris	$c0="ebx";
11555714Skris	$c1="ecx";
11655714Skris	$c2="ebp";
11755714Skris	$a="esi";
11855714Skris	$b="edi";
11955714Skris
12055714Skris	$as=0;
12155714Skris	$ae=0;
12255714Skris	$bs=0;
12355714Skris	$be=0;
12455714Skris	$tot=$num+$num-1;
12555714Skris
12655714Skris	&push("esi");
12755714Skris	 &mov($a,&wparam(1));
12855714Skris	&push("edi");
12955714Skris	 &mov($b,&wparam(2));
13055714Skris	&push("ebp");
13155714Skris	 &push("ebx");
13255714Skris
13355714Skris	&xor($c0,$c0);
13455714Skris	 &mov("eax",&DWP(0,$a,"",0));	# load the first word
13555714Skris	&xor($c1,$c1);
13655714Skris	 &mov("edx",&DWP(0,$b,"",0));	# load the first second
13755714Skris
13855714Skris	for ($i=0; $i<$tot; $i++)
13955714Skris		{
14055714Skris		$ai=$as;
14155714Skris		$bi=$bs;
14255714Skris		$end=$be+1;
14355714Skris
14455714Skris		&comment("################## Calculate word $i");
14555714Skris
14655714Skris		for ($j=$bs; $j<$end; $j++)
14755714Skris			{
14855714Skris			&xor($c2,$c2) if ($j == $bs);
14955714Skris			if (($j+1) == $end)
15055714Skris				{
15155714Skris				$v=1;
15255714Skris				$v=2 if (($i+1) == $tot);
15355714Skris				}
15455714Skris			else
15555714Skris				{ $v=0; }
15655714Skris			if (($j+1) != $end)
15755714Skris				{
15855714Skris				$na=($ai-1);
15955714Skris				$nb=($bi+1);
16055714Skris				}
16155714Skris			else
16255714Skris				{
16355714Skris				$na=$as+($i < ($num-1));
16455714Skris				$nb=$bs+($i >= ($num-1));
16555714Skris				}
16655714Skris#printf STDERR "[$ai,$bi] -> [$na,$nb]\n";
16755714Skris			&mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb);
16855714Skris			if ($v)
16955714Skris				{
17055714Skris				&comment("saved r[$i]");
17155714Skris				# &mov("eax",&wparam(0));
17255714Skris				# &mov(&DWP($i*4,"eax","",0),$c0);
17355714Skris				($c0,$c1,$c2)=($c1,$c2,$c0);
17455714Skris				}
17555714Skris			$ai--;
17655714Skris			$bi++;
17755714Skris			}
17855714Skris		$as++ if ($i < ($num-1));
17955714Skris		$ae++ if ($i >= ($num-1));
18055714Skris
18155714Skris		$bs++ if ($i >= ($num-1));
18255714Skris		$be++ if ($i < ($num-1));
18355714Skris		}
18455714Skris	&comment("save r[$i]");
18555714Skris	# &mov("eax",&wparam(0));
18655714Skris	&mov(&DWP($i*4,"eax","",0),$c0);
18755714Skris
18855714Skris	&pop("ebx");
18955714Skris	&pop("ebp");
19055714Skris	&pop("edi");
19155714Skris	&pop("esi");
19255714Skris	&ret();
19355714Skris	&function_end_B($name);
19455714Skris	}
19555714Skris
19655714Skrissub bn_sqr_comba
19755714Skris	{
19855714Skris	local($name,$num)=@_;
19955714Skris	local($r,$a,$c0,$c1,$c2)=@_;
20055714Skris	local($i,$as,$ae,$bs,$be,$ai,$bi);
20155714Skris	local($b,$tot,$end,$half);
20255714Skris
20355714Skris	&function_begin_B($name,"");
20455714Skris
20555714Skris	$c0="ebx";
20655714Skris	$c1="ecx";
20755714Skris	$c2="ebp";
20855714Skris	$a="esi";
20955714Skris	$r="edi";
21055714Skris
21155714Skris	&push("esi");
21255714Skris	 &push("edi");
21355714Skris	&push("ebp");
21455714Skris	 &push("ebx");
21555714Skris	&mov($r,&wparam(0));
21655714Skris	 &mov($a,&wparam(1));
21755714Skris	&xor($c0,$c0);
21855714Skris	 &xor($c1,$c1);
21955714Skris	&mov("eax",&DWP(0,$a,"",0)); # load the first word
22055714Skris
22155714Skris	$as=0;
22255714Skris	$ae=0;
22355714Skris	$bs=0;
22455714Skris	$be=0;
22555714Skris	$tot=$num+$num-1;
22655714Skris
22755714Skris	for ($i=0; $i<$tot; $i++)
22855714Skris		{
22955714Skris		$ai=$as;
23055714Skris		$bi=$bs;
23155714Skris		$end=$be+1;
23255714Skris
23355714Skris		&comment("############### Calculate word $i");
23455714Skris		for ($j=$bs; $j<$end; $j++)
23555714Skris			{
23655714Skris			&xor($c2,$c2) if ($j == $bs);
23755714Skris			if (($ai-1) < ($bi+1))
23855714Skris				{
23955714Skris				$v=1;
24055714Skris				$v=2 if ($i+1) == $tot;
24155714Skris				}
24255714Skris			else
24355714Skris				{ $v=0; }
24455714Skris			if (!$v)
24555714Skris				{
24655714Skris				$na=$ai-1;
24755714Skris				$nb=$bi+1;
24855714Skris				}
24955714Skris			else
25055714Skris				{
25155714Skris				$na=$as+($i < ($num-1));
25255714Skris				$nb=$bs+($i >= ($num-1));
25355714Skris				}
25455714Skris			if ($ai == $bi)
25555714Skris				{
25655714Skris				&sqr_add_c($r,$a,$ai,$bi,
25755714Skris					$c0,$c1,$c2,$v,$i,$na,$nb);
25855714Skris				}
25955714Skris			else
26055714Skris				{
26155714Skris				&sqr_add_c2($r,$a,$ai,$bi,
26255714Skris					$c0,$c1,$c2,$v,$i,$na,$nb);
26355714Skris				}
26455714Skris			if ($v)
26555714Skris				{
26655714Skris				&comment("saved r[$i]");
26755714Skris				#&mov(&DWP($i*4,$r,"",0),$c0);
26855714Skris				($c0,$c1,$c2)=($c1,$c2,$c0);
26955714Skris				last;
27055714Skris				}
27155714Skris			$ai--;
27255714Skris			$bi++;
27355714Skris			}
27455714Skris		$as++ if ($i < ($num-1));
27555714Skris		$ae++ if ($i >= ($num-1));
27655714Skris
27755714Skris		$bs++ if ($i >= ($num-1));
27855714Skris		$be++ if ($i < ($num-1));
27955714Skris		}
28055714Skris	&mov(&DWP($i*4,$r,"",0),$c0);
28155714Skris	&pop("ebx");
28255714Skris	&pop("ebp");
28355714Skris	&pop("edi");
28455714Skris	&pop("esi");
28555714Skris	&ret();
28655714Skris	&function_end_B($name);
28755714Skris	}
288