bn-586.pl revision 127128
1296417Sdim#!/usr/local/bin/perl
2254721Semaste
3353358Sdimpush(@INC,"perlasm","../../perlasm");
4353358Sdimrequire "x86asm.pl";
5353358Sdim
6254721Semaste&asm_init($ARGV[0],$0);
7254721Semaste
8254721Semaste&bn_mul_add_words("bn_mul_add_words");
9254721Semaste&bn_mul_words("bn_mul_words");
10254721Semaste&bn_sqr_words("bn_sqr_words");
11254721Semaste&bn_div_words("bn_div_words");
12296417Sdim&bn_add_words("bn_add_words");
13296417Sdim&bn_sub_words("bn_sub_words");
14309124Sdim#&bn_sub_part_words("bn_sub_part_words");
15296417Sdim
16296417Sdim&asm_finish();
17254721Semaste
18314564Sdimsub bn_mul_add_words
19254721Semaste	{
20254721Semaste	local($name)=@_;
21258884Semaste
22262528Semaste	&function_begin($name,"");
23254721Semaste
24262528Semaste	&comment("");
25262528Semaste	$Low="eax";
26314564Sdim	$High="edx";
27314564Sdim	$a="ebx";
28314564Sdim	$w="ebp";
29360784Sdim	$r="edi";
30314564Sdim	$c="esi";
31258054Semaste
32314564Sdim	&xor($c,$c);		# clear carry
33314564Sdim	&mov($r,&wparam(0));	#
34314564Sdim
35314564Sdim	&mov("ecx",&wparam(2));	#
36314564Sdim	&mov($a,&wparam(1));	#
37314564Sdim
38314564Sdim	&and("ecx",0xfffffff8);	# num / 8
39314564Sdim	&mov($w,&wparam(3));	#
40314564Sdim
41314564Sdim	&push("ecx");		# Up the stack for a tmp variable
42314564Sdim
43314564Sdim	&jz(&label("maw_finish"));
44314564Sdim
45314564Sdim	&set_label("maw_loop",0);
46314564Sdim
47314564Sdim	&mov(&swtmp(0),"ecx");	#
48314564Sdim
49314564Sdim	for ($i=0; $i<32; $i+=4)
50314564Sdim		{
51314564Sdim		&comment("Round $i");
52314564Sdim
53314564Sdim		 &mov("eax",&DWP($i,$a,"",0)); 	# *a
54314564Sdim		&mul($w);			# *a * w
55314564Sdim		&add("eax",$c);		# L(t)+= *r
56314564Sdim		 &mov($c,&DWP($i,$r,"",0));	# L(t)+= *r
57314564Sdim		&adc("edx",0);			# H(t)+=carry
58314564Sdim		 &add("eax",$c);		# L(t)+=c
59314564Sdim		&adc("edx",0);			# H(t)+=carry
60314564Sdim		 &mov(&DWP($i,$r,"",0),"eax");	# *r= L(t);
61314564Sdim		&mov($c,"edx");			# c=  H(t);
62314564Sdim		}
63314564Sdim
64314564Sdim	&comment("");
65314564Sdim	&mov("ecx",&swtmp(0));	#
66314564Sdim	&add($a,32);
67314564Sdim	&add($r,32);
68314564Sdim	&sub("ecx",8);
69314564Sdim	&jnz(&label("maw_loop"));
70314564Sdim
71262528Semaste	&set_label("maw_finish",0);
72314564Sdim	&mov("ecx",&wparam(2));	# get num
73314564Sdim	&and("ecx",7);
74314564Sdim	&jnz(&label("maw_finish2"));	# helps branch prediction
75258054Semaste	&jmp(&label("maw_end"));
76314564Sdim
77314564Sdim	&set_label("maw_finish2",1);
78258054Semaste	for ($i=0; $i<7; $i++)
79314564Sdim		{
80314564Sdim		&comment("Tail Round $i");
81314564Sdim		 &mov("eax",&DWP($i*4,$a,"",0));# *a
82314564Sdim		&mul($w);			# *a * w
83314564Sdim		&add("eax",$c);			# L(t)+=c
84353358Sdim		 &mov($c,&DWP($i*4,$r,"",0));	# L(t)+= *r
85314564Sdim		&adc("edx",0);			# H(t)+=carry
86314564Sdim		 &add("eax",$c);
87296417Sdim		&adc("edx",0);			# H(t)+=carry
88314564Sdim		 &dec("ecx") if ($i != 7-1);
89314564Sdim		&mov(&DWP($i*4,$r,"",0),"eax");	# *r= L(t);
90314564Sdim		 &mov($c,"edx");			# c=  H(t);
91314564Sdim		&jz(&label("maw_end")) if ($i != 7-1);
92296417Sdim		}
93314564Sdim	&set_label("maw_end",0);
94314564Sdim	&mov("eax",$c);
95314564Sdim
96314564Sdim	&pop("ecx");	# clear variable from
97314564Sdim
98314564Sdim	&function_end($name);
99314564Sdim	}
100314564Sdim
101314564Sdimsub bn_mul_words
102314564Sdim	{
103314564Sdim	local($name)=@_;
104314564Sdim
105314564Sdim	&function_begin($name,"");
106314564Sdim
107314564Sdim	&comment("");
108314564Sdim	$Low="eax";
109314564Sdim	$High="edx";
110314564Sdim	$a="ebx";
111314564Sdim	$w="ecx";
112314564Sdim	$r="edi";
113314564Sdim	$c="esi";
114314564Sdim	$num="ebp";
115314564Sdim
116314564Sdim	&xor($c,$c);		# clear carry
117314564Sdim	&mov($r,&wparam(0));	#
118314564Sdim	&mov($a,&wparam(1));	#
119314564Sdim	&mov($num,&wparam(2));	#
120314564Sdim	&mov($w,&wparam(3));	#
121314564Sdim
122314564Sdim	&and($num,0xfffffff8);	# num / 8
123314564Sdim	&jz(&label("mw_finish"));
124314564Sdim
125314564Sdim	&set_label("mw_loop",0);
126314564Sdim	for ($i=0; $i<32; $i+=4)
127314564Sdim		{
128314564Sdim		&comment("Round $i");
129314564Sdim
130314564Sdim		 &mov("eax",&DWP($i,$a,"",0)); 	# *a
131314564Sdim		&mul($w);			# *a * w
132314564Sdim		&add("eax",$c);			# L(t)+=c
133314564Sdim		 # XXX
134314564Sdim
135314564Sdim		&adc("edx",0);			# H(t)+=carry
136314564Sdim		 &mov(&DWP($i,$r,"",0),"eax");	# *r= L(t);
137314564Sdim
138314564Sdim		&mov($c,"edx");			# c=  H(t);
139314564Sdim		}
140314564Sdim
141314564Sdim	&comment("");
142314564Sdim	&add($a,32);
143314564Sdim	&add($r,32);
144296417Sdim	&sub($num,8);
145314564Sdim	&jz(&label("mw_finish"));
146314564Sdim	&jmp(&label("mw_loop"));
147314564Sdim
148314564Sdim	&set_label("mw_finish",0);
149314564Sdim	&mov($num,&wparam(2));	# get num
150314564Sdim	&and($num,7);
151296417Sdim	&jnz(&label("mw_finish2"));
152314564Sdim	&jmp(&label("mw_end"));
153314564Sdim
154314564Sdim	&set_label("mw_finish2",1);
155314564Sdim	for ($i=0; $i<7; $i++)
156314564Sdim		{
157314564Sdim		&comment("Tail Round $i");
158314564Sdim		 &mov("eax",&DWP($i*4,$a,"",0));# *a
159314564Sdim		&mul($w);			# *a * w
160296417Sdim		&add("eax",$c);			# L(t)+=c
161314564Sdim		 # XXX
162314564Sdim		&adc("edx",0);			# H(t)+=carry
163314564Sdim		 &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t);
164314564Sdim		&mov($c,"edx");			# c=  H(t);
165314564Sdim		 &dec($num) if ($i != 7-1);
166314564Sdim		&jz(&label("mw_end")) if ($i != 7-1);
167314564Sdim		}
168314564Sdim	&set_label("mw_end",0);
169314564Sdim	&mov("eax",$c);
170314564Sdim
171314564Sdim	&function_end($name);
172314564Sdim	}
173314564Sdim
174296417Sdimsub bn_sqr_words
175314564Sdim	{
176314564Sdim	local($name)=@_;
177314564Sdim
178296417Sdim	&function_begin($name,"");
179314564Sdim
180314564Sdim	&comment("");
181314564Sdim	$r="esi";
182314564Sdim	$a="edi";
183314564Sdim	$num="ebx";
184314564Sdim
185314564Sdim	&mov($r,&wparam(0));	#
186314564Sdim	&mov($a,&wparam(1));	#
187314564Sdim	&mov($num,&wparam(2));	#
188314564Sdim
189360784Sdim	&and($num,0xfffffff8);	# num / 8
190314564Sdim	&jz(&label("sw_finish"));
191314564Sdim
192314564Sdim	&set_label("sw_loop",0);
193314564Sdim	for ($i=0; $i<32; $i+=4)
194314564Sdim		{
195314564Sdim		&comment("Round $i");
196314564Sdim		&mov("eax",&DWP($i,$a,"",0)); 	# *a
197314564Sdim		 # XXX
198314564Sdim		&mul("eax");			# *a * *a
199314564Sdim		&mov(&DWP($i*2,$r,"",0),"eax");	#
200314564Sdim		 &mov(&DWP($i*2+4,$r,"",0),"edx");#
201314564Sdim		}
202314564Sdim
203314564Sdim	&comment("");
204314564Sdim	&add($a,32);
205314564Sdim	&add($r,64);
206314564Sdim	&sub($num,8);
207314564Sdim	&jnz(&label("sw_loop"));
208314564Sdim
209314564Sdim	&set_label("sw_finish",0);
210314564Sdim	&mov($num,&wparam(2));	# get num
211314564Sdim	&and($num,7);
212314564Sdim	&jz(&label("sw_end"));
213314564Sdim
214314564Sdim	for ($i=0; $i<7; $i++)
215314564Sdim		{
216314564Sdim		&comment("Tail Round $i");
217314564Sdim		&mov("eax",&DWP($i*4,$a,"",0));	# *a
218314564Sdim		 # XXX
219314564Sdim		&mul("eax");			# *a * *a
220314564Sdim		&mov(&DWP($i*8,$r,"",0),"eax");	#
221314564Sdim		 &dec($num) if ($i != 7-1);
222314564Sdim		&mov(&DWP($i*8+4,$r,"",0),"edx");
223314564Sdim		 &jz(&label("sw_end")) if ($i != 7-1);
224314564Sdim		}
225314564Sdim	&set_label("sw_end",0);
226314564Sdim
227314564Sdim	&function_end($name);
228314564Sdim	}
229314564Sdim
230314564Sdimsub bn_div_words
231314564Sdim	{
232314564Sdim	local($name)=@_;
233314564Sdim
234314564Sdim	&function_begin($name,"");
235314564Sdim	&mov("edx",&wparam(0));	#
236314564Sdim	&mov("eax",&wparam(1));	#
237314564Sdim	&mov("ebx",&wparam(2));	#
238314564Sdim	&div("ebx");
239314564Sdim	&function_end($name);
240314564Sdim	}
241314564Sdim
242314564Sdimsub bn_add_words
243314564Sdim	{
244314564Sdim	local($name)=@_;
245314564Sdim
246314564Sdim	&function_begin($name,"");
247258054Semaste
248314564Sdim	&comment("");
249314564Sdim	$a="esi";
250314564Sdim	$b="edi";
251314564Sdim	$c="eax";
252314564Sdim	$r="ebx";
253314564Sdim	$tmp1="ecx";
254314564Sdim	$tmp2="edx";
255314564Sdim	$num="ebp";
256314564Sdim
257314564Sdim	&mov($r,&wparam(0));	# get r
258314564Sdim	 &mov($a,&wparam(1));	# get a
259314564Sdim	&mov($b,&wparam(2));	# get b
260314564Sdim	 &mov($num,&wparam(3));	# get num
261314564Sdim	&xor($c,$c);		# clear carry
262314564Sdim	 &and($num,0xfffffff8);	# num / 8
263314564Sdim
264314564Sdim	&jz(&label("aw_finish"));
265314564Sdim
266314564Sdim	&set_label("aw_loop",0);
267314564Sdim	for ($i=0; $i<8; $i++)
268314564Sdim		{
269314564Sdim		&comment("Round $i");
270314564Sdim
271314564Sdim		&mov($tmp1,&DWP($i*4,$a,"",0)); 	# *a
272314564Sdim		 &mov($tmp2,&DWP($i*4,$b,"",0)); 	# *b
273314564Sdim		&add($tmp1,$c);
274314564Sdim		 &mov($c,0);
275314564Sdim		&adc($c,$c);
276314564Sdim		 &add($tmp1,$tmp2);
277296417Sdim		&adc($c,0);
278314564Sdim		 &mov(&DWP($i*4,$r,"",0),$tmp1); 	# *r
279314564Sdim		}
280314564Sdim
281344779Sdim	&comment("");
282314564Sdim	&add($a,32);
283314564Sdim	 &add($b,32);
284314564Sdim	&add($r,32);
285314564Sdim	 &sub($num,8);
286314564Sdim	&jnz(&label("aw_loop"));
287360784Sdim
288314564Sdim	&set_label("aw_finish",0);
289314564Sdim	&mov($num,&wparam(3));	# get num
290360784Sdim	&and($num,7);
291314564Sdim	 &jz(&label("aw_end"));
292314564Sdim
293360784Sdim	for ($i=0; $i<7; $i++)
294314564Sdim		{
295314564Sdim		&comment("Tail Round $i");
296314564Sdim		&mov($tmp1,&DWP($i*4,$a,"",0));	# *a
297314564Sdim		 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
298314564Sdim		&add($tmp1,$c);
299314564Sdim		 &mov($c,0);
300314564Sdim		&adc($c,$c);
301314564Sdim		 &add($tmp1,$tmp2);
302314564Sdim		&adc($c,0);
303314564Sdim		 &dec($num) if ($i != 6);
304314564Sdim		&mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
305314564Sdim		 &jz(&label("aw_end")) if ($i != 6);
306314564Sdim		}
307314564Sdim	&set_label("aw_end",0);
308314564Sdim
309314564Sdim#	&mov("eax",$c);		# $c is "eax"
310314564Sdim
311314564Sdim	&function_end($name);
312314564Sdim	}
313314564Sdim
314314564Sdimsub bn_sub_words
315314564Sdim	{
316314564Sdim	local($name)=@_;
317314564Sdim
318314564Sdim	&function_begin($name,"");
319314564Sdim
320314564Sdim	&comment("");
321314564Sdim	$a="esi";
322314564Sdim	$b="edi";
323314564Sdim	$c="eax";
324314564Sdim	$r="ebx";
325309124Sdim	$tmp1="ecx";
326314564Sdim	$tmp2="edx";
327309124Sdim	$num="ebp";
328314564Sdim
329296417Sdim	&mov($r,&wparam(0));	# get r
330314564Sdim	 &mov($a,&wparam(1));	# get a
331296417Sdim	&mov($b,&wparam(2));	# get b
332314564Sdim	 &mov($num,&wparam(3));	# get num
333314564Sdim	&xor($c,$c);		# clear carry
334314564Sdim	 &and($num,0xfffffff8);	# num / 8
335314564Sdim
336314564Sdim	&jz(&label("aw_finish"));
337314564Sdim
338314564Sdim	&set_label("aw_loop",0);
339314564Sdim	for ($i=0; $i<8; $i++)
340314564Sdim		{
341314564Sdim		&comment("Round $i");
342360784Sdim
343314564Sdim		&mov($tmp1,&DWP($i*4,$a,"",0)); 	# *a
344314564Sdim		 &mov($tmp2,&DWP($i*4,$b,"",0)); 	# *b
345314564Sdim		&sub($tmp1,$c);
346314564Sdim		 &mov($c,0);
347314564Sdim		&adc($c,$c);
348314564Sdim		 &sub($tmp1,$tmp2);
349314564Sdim		&adc($c,0);
350314564Sdim		 &mov(&DWP($i*4,$r,"",0),$tmp1); 	# *r
351314564Sdim		}
352314564Sdim
353314564Sdim	&comment("");
354314564Sdim	&add($a,32);
355314564Sdim	 &add($b,32);
356314564Sdim	&add($r,32);
357314564Sdim	 &sub($num,8);
358314564Sdim	&jnz(&label("aw_loop"));
359314564Sdim
360314564Sdim	&set_label("aw_finish",0);
361314564Sdim	&mov($num,&wparam(3));	# get num
362314564Sdim	&and($num,7);
363314564Sdim	 &jz(&label("aw_end"));
364314564Sdim
365314564Sdim	for ($i=0; $i<7; $i++)
366314564Sdim		{
367254721Semaste		&comment("Tail Round $i");
368254721Semaste		&mov($tmp1,&DWP($i*4,$a,"",0));	# *a
369296417Sdim		 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
370		&sub($tmp1,$c);
371		 &mov($c,0);
372		&adc($c,$c);
373		 &sub($tmp1,$tmp2);
374		&adc($c,0);
375		 &dec($num) if ($i != 6);
376		&mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
377		 &jz(&label("aw_end")) if ($i != 6);
378		}
379	&set_label("aw_end",0);
380
381#	&mov("eax",$c);		# $c is "eax"
382
383	&function_end($name);
384	}
385
386sub bn_sub_part_words
387	{
388	local($name)=@_;
389
390	&function_begin($name,"");
391
392	&comment("");
393	$a="esi";
394	$b="edi";
395	$c="eax";
396	$r="ebx";
397	$tmp1="ecx";
398	$tmp2="edx";
399	$num="ebp";
400
401	&mov($r,&wparam(0));	# get r
402	 &mov($a,&wparam(1));	# get a
403	&mov($b,&wparam(2));	# get b
404	 &mov($num,&wparam(3));	# get num
405	&xor($c,$c);		# clear carry
406	 &and($num,0xfffffff8);	# num / 8
407
408	&jz(&label("aw_finish"));
409
410	&set_label("aw_loop",0);
411	for ($i=0; $i<8; $i++)
412		{
413		&comment("Round $i");
414
415		&mov($tmp1,&DWP($i*4,$a,"",0)); 	# *a
416		 &mov($tmp2,&DWP($i*4,$b,"",0)); 	# *b
417		&sub($tmp1,$c);
418		 &mov($c,0);
419		&adc($c,$c);
420		 &sub($tmp1,$tmp2);
421		&adc($c,0);
422		 &mov(&DWP($i*4,$r,"",0),$tmp1); 	# *r
423		}
424
425	&comment("");
426	&add($a,32);
427	 &add($b,32);
428	&add($r,32);
429	 &sub($num,8);
430	&jnz(&label("aw_loop"));
431
432	&set_label("aw_finish",0);
433	&mov($num,&wparam(3));	# get num
434	&and($num,7);
435	 &jz(&label("aw_end"));
436
437	for ($i=0; $i<7; $i++)
438		{
439		&comment("Tail Round $i");
440		&mov($tmp1,&DWP(0,$a,"",0));	# *a
441		 &mov($tmp2,&DWP(0,$b,"",0));# *b
442		&sub($tmp1,$c);
443		 &mov($c,0);
444		&adc($c,$c);
445		 &sub($tmp1,$tmp2);
446		&adc($c,0);
447		&mov(&DWP(0,$r,"",0),$tmp1);	# *r
448		&add($a, 4);
449		&add($b, 4);
450		&add($r, 4);
451		 &dec($num) if ($i != 6);
452		 &jz(&label("aw_end")) if ($i != 6);
453		}
454	&set_label("aw_end",0);
455
456	&cmp(&wparam(4),0);
457	&je(&label("pw_end"));
458
459	&mov($num,&wparam(4));	# get dl
460	&cmp($num,0);
461	&je(&label("pw_end"));
462	&jge(&label("pw_pos"));
463
464	&comment("pw_neg");
465	&mov($tmp2,0);
466	&sub($tmp2,$num);
467	&mov($num,$tmp2);
468	&and($num,0xfffffff8);	# num / 8
469	&jz(&label("pw_neg_finish"));
470
471	&set_label("pw_neg_loop",0);
472	for ($i=0; $i<8; $i++)
473	{
474	    &comment("dl<0 Round $i");
475
476	    &mov($tmp1,0);
477	    &mov($tmp2,&DWP($i*4,$b,"",0)); 	# *b
478	    &sub($tmp1,$c);
479	    &mov($c,0);
480	    &adc($c,$c);
481	    &sub($tmp1,$tmp2);
482	    &adc($c,0);
483	    &mov(&DWP($i*4,$r,"",0),$tmp1); 	# *r
484	}
485
486	&comment("");
487	&add($b,32);
488	&add($r,32);
489	&sub($num,8);
490	&jnz(&label("pw_neg_loop"));
491
492	&set_label("pw_neg_finish",0);
493	&mov($tmp2,&wparam(4));	# get dl
494	&mov($num,0);
495	&sub($num,$tmp2);
496	&and($num,7);
497	&jz(&label("pw_end"));
498
499	for ($i=0; $i<7; $i++)
500	{
501	    &comment("dl<0 Tail Round $i");
502	    &mov($tmp1,0);
503	    &mov($tmp2,&DWP($i*4,$b,"",0));# *b
504	    &sub($tmp1,$c);
505	    &mov($c,0);
506	    &adc($c,$c);
507	    &sub($tmp1,$tmp2);
508	    &adc($c,0);
509	    &dec($num) if ($i != 6);
510	    &mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
511	    &jz(&label("pw_end")) if ($i != 6);
512	}
513
514	&jmp(&label("pw_end"));
515
516	&set_label("pw_pos",0);
517
518	&and($num,0xfffffff8);	# num / 8
519	&jz(&label("pw_pos_finish"));
520
521	&set_label("pw_pos_loop",0);
522
523	for ($i=0; $i<8; $i++)
524	{
525	    &comment("dl>0 Round $i");
526
527	    &mov($tmp1,&DWP($i*4,$a,"",0));	# *a
528	    &sub($tmp1,$c);
529	    &mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
530	    &jnc(&label("pw_nc".$i));
531	}
532
533	&comment("");
534	&add($a,32);
535	&add($r,32);
536	&sub($num,8);
537	&jnz(&label("pw_pos_loop"));
538
539	&set_label("pw_pos_finish",0);
540	&mov($num,&wparam(4));	# get dl
541	&and($num,7);
542	&jz(&label("pw_end"));
543
544	for ($i=0; $i<7; $i++)
545	{
546	    &comment("dl>0 Tail Round $i");
547	    &mov($tmp1,&DWP($i*4,$a,"",0));	# *a
548	    &sub($tmp1,$c);
549	    &mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
550	    &jnc(&label("pw_tail_nc".$i));
551	    &dec($num) if ($i != 6);
552	    &jz(&label("pw_end")) if ($i != 6);
553	}
554	&mov($c,1);
555	&jmp(&label("pw_end"));
556
557	&set_label("pw_nc_loop",0);
558	for ($i=0; $i<8; $i++)
559	{
560	    &mov($tmp1,&DWP($i*4,$a,"",0));	# *a
561	    &mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
562	    &set_label("pw_nc".$i,0);
563	}
564
565	&comment("");
566	&add($a,32);
567	&add($r,32);
568	&sub($num,8);
569	&jnz(&label("pw_nc_loop"));
570
571	&mov($num,&wparam(4));	# get dl
572	&and($num,7);
573	&jz(&label("pw_nc_end"));
574
575	for ($i=0; $i<7; $i++)
576	{
577	    &mov($tmp1,&DWP($i*4,$a,"",0));	# *a
578	    &mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
579	    &set_label("pw_tail_nc".$i,0);
580	    &dec($num) if ($i != 6);
581	    &jz(&label("pw_nc_end")) if ($i != 6);
582	}
583
584	&set_label("pw_nc_end",0);
585	&mov($c,0);
586
587	&set_label("pw_end",0);
588
589#	&mov("eax",$c);		# $c is "eax"
590
591	&function_end($name);
592	}
593
594