bn-586.pl revision 109998
1#!/usr/local/bin/perl
2
3push(@INC,"perlasm","../../perlasm");
4require "x86asm.pl";
5
6&asm_init($ARGV[0],$0);
7
8&bn_mul_add_words("bn_mul_add_words");
9&bn_mul_words("bn_mul_words");
10&bn_sqr_words("bn_sqr_words");
11&bn_div_words("bn_div_words");
12&bn_add_words("bn_add_words");
13&bn_sub_words("bn_sub_words");
14&bn_sub_part_words("bn_sub_part_words");
15
16&asm_finish();
17
18sub bn_mul_add_words
19	{
20	local($name)=@_;
21
22	&function_begin($name,"");
23
24	&comment("");
25	$Low="eax";
26	$High="edx";
27	$a="ebx";
28	$w="ebp";
29	$r="edi";
30	$c="esi";
31
32	&xor($c,$c);		# clear carry
33	&mov($r,&wparam(0));	#
34
35	&mov("ecx",&wparam(2));	#
36	&mov($a,&wparam(1));	#
37
38	&and("ecx",0xfffffff8);	# num / 8
39	&mov($w,&wparam(3));	#
40
41	&push("ecx");		# Up the stack for a tmp variable
42
43	&jz(&label("maw_finish"));
44
45	&set_label("maw_loop",0);
46
47	&mov(&swtmp(0),"ecx");	#
48
49	for ($i=0; $i<32; $i+=4)
50		{
51		&comment("Round $i");
52
53		 &mov("eax",&DWP($i,$a,"",0)); 	# *a
54		&mul($w);			# *a * w
55		&add("eax",$c);		# L(t)+= *r
56		 &mov($c,&DWP($i,$r,"",0));	# L(t)+= *r
57		&adc("edx",0);			# H(t)+=carry
58		 &add("eax",$c);		# L(t)+=c
59		&adc("edx",0);			# H(t)+=carry
60		 &mov(&DWP($i,$r,"",0),"eax");	# *r= L(t);
61		&mov($c,"edx");			# c=  H(t);
62		}
63
64	&comment("");
65	&mov("ecx",&swtmp(0));	#
66	&add($a,32);
67	&add($r,32);
68	&sub("ecx",8);
69	&jnz(&label("maw_loop"));
70
71	&set_label("maw_finish",0);
72	&mov("ecx",&wparam(2));	# get num
73	&and("ecx",7);
74	&jnz(&label("maw_finish2"));	# helps branch prediction
75	&jmp(&label("maw_end"));
76
77	&set_label("maw_finish2",1);
78	for ($i=0; $i<7; $i++)
79		{
80		&comment("Tail Round $i");
81		 &mov("eax",&DWP($i*4,$a,"",0));# *a
82		&mul($w);			# *a * w
83		&add("eax",$c);			# L(t)+=c
84		 &mov($c,&DWP($i*4,$r,"",0));	# L(t)+= *r
85		&adc("edx",0);			# H(t)+=carry
86		 &add("eax",$c);
87		&adc("edx",0);			# H(t)+=carry
88		 &dec("ecx") if ($i != 7-1);
89		&mov(&DWP($i*4,$r,"",0),"eax");	# *r= L(t);
90		 &mov($c,"edx");			# c=  H(t);
91		&jz(&label("maw_end")) if ($i != 7-1);
92		}
93	&set_label("maw_end",0);
94	&mov("eax",$c);
95
96	&pop("ecx");	# clear variable from
97
98	&function_end($name);
99	}
100
101sub bn_mul_words
102	{
103	local($name)=@_;
104
105	&function_begin($name,"");
106
107	&comment("");
108	$Low="eax";
109	$High="edx";
110	$a="ebx";
111	$w="ecx";
112	$r="edi";
113	$c="esi";
114	$num="ebp";
115
116	&xor($c,$c);		# clear carry
117	&mov($r,&wparam(0));	#
118	&mov($a,&wparam(1));	#
119	&mov($num,&wparam(2));	#
120	&mov($w,&wparam(3));	#
121
122	&and($num,0xfffffff8);	# num / 8
123	&jz(&label("mw_finish"));
124
125	&set_label("mw_loop",0);
126	for ($i=0; $i<32; $i+=4)
127		{
128		&comment("Round $i");
129
130		 &mov("eax",&DWP($i,$a,"",0)); 	# *a
131		&mul($w);			# *a * w
132		&add("eax",$c);			# L(t)+=c
133		 # XXX
134
135		&adc("edx",0);			# H(t)+=carry
136		 &mov(&DWP($i,$r,"",0),"eax");	# *r= L(t);
137
138		&mov($c,"edx");			# c=  H(t);
139		}
140
141	&comment("");
142	&add($a,32);
143	&add($r,32);
144	&sub($num,8);
145	&jz(&label("mw_finish"));
146	&jmp(&label("mw_loop"));
147
148	&set_label("mw_finish",0);
149	&mov($num,&wparam(2));	# get num
150	&and($num,7);
151	&jnz(&label("mw_finish2"));
152	&jmp(&label("mw_end"));
153
154	&set_label("mw_finish2",1);
155	for ($i=0; $i<7; $i++)
156		{
157		&comment("Tail Round $i");
158		 &mov("eax",&DWP($i*4,$a,"",0));# *a
159		&mul($w);			# *a * w
160		&add("eax",$c);			# L(t)+=c
161		 # XXX
162		&adc("edx",0);			# H(t)+=carry
163		 &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t);
164		&mov($c,"edx");			# c=  H(t);
165		 &dec($num) if ($i != 7-1);
166		&jz(&label("mw_end")) if ($i != 7-1);
167		}
168	&set_label("mw_end",0);
169	&mov("eax",$c);
170
171	&function_end($name);
172	}
173
174sub bn_sqr_words
175	{
176	local($name)=@_;
177
178	&function_begin($name,"");
179
180	&comment("");
181	$r="esi";
182	$a="edi";
183	$num="ebx";
184
185	&mov($r,&wparam(0));	#
186	&mov($a,&wparam(1));	#
187	&mov($num,&wparam(2));	#
188
189	&and($num,0xfffffff8);	# num / 8
190	&jz(&label("sw_finish"));
191
192	&set_label("sw_loop",0);
193	for ($i=0; $i<32; $i+=4)
194		{
195		&comment("Round $i");
196		&mov("eax",&DWP($i,$a,"",0)); 	# *a
197		 # XXX
198		&mul("eax");			# *a * *a
199		&mov(&DWP($i*2,$r,"",0),"eax");	#
200		 &mov(&DWP($i*2+4,$r,"",0),"edx");#
201		}
202
203	&comment("");
204	&add($a,32);
205	&add($r,64);
206	&sub($num,8);
207	&jnz(&label("sw_loop"));
208
209	&set_label("sw_finish",0);
210	&mov($num,&wparam(2));	# get num
211	&and($num,7);
212	&jz(&label("sw_end"));
213
214	for ($i=0; $i<7; $i++)
215		{
216		&comment("Tail Round $i");
217		&mov("eax",&DWP($i*4,$a,"",0));	# *a
218		 # XXX
219		&mul("eax");			# *a * *a
220		&mov(&DWP($i*8,$r,"",0),"eax");	#
221		 &dec($num) if ($i != 7-1);
222		&mov(&DWP($i*8+4,$r,"",0),"edx");
223		 &jz(&label("sw_end")) if ($i != 7-1);
224		}
225	&set_label("sw_end",0);
226
227	&function_end($name);
228	}
229
230sub bn_div_words
231	{
232	local($name)=@_;
233
234	&function_begin($name,"");
235	&mov("edx",&wparam(0));	#
236	&mov("eax",&wparam(1));	#
237	&mov("ebx",&wparam(2));	#
238	&div("ebx");
239	&function_end($name);
240	}
241
242sub bn_add_words
243	{
244	local($name)=@_;
245
246	&function_begin($name,"");
247
248	&comment("");
249	$a="esi";
250	$b="edi";
251	$c="eax";
252	$r="ebx";
253	$tmp1="ecx";
254	$tmp2="edx";
255	$num="ebp";
256
257	&mov($r,&wparam(0));	# get r
258	 &mov($a,&wparam(1));	# get a
259	&mov($b,&wparam(2));	# get b
260	 &mov($num,&wparam(3));	# get num
261	&xor($c,$c);		# clear carry
262	 &and($num,0xfffffff8);	# num / 8
263
264	&jz(&label("aw_finish"));
265
266	&set_label("aw_loop",0);
267	for ($i=0; $i<8; $i++)
268		{
269		&comment("Round $i");
270
271		&mov($tmp1,&DWP($i*4,$a,"",0)); 	# *a
272		 &mov($tmp2,&DWP($i*4,$b,"",0)); 	# *b
273		&add($tmp1,$c);
274		 &mov($c,0);
275		&adc($c,$c);
276		 &add($tmp1,$tmp2);
277		&adc($c,0);
278		 &mov(&DWP($i*4,$r,"",0),$tmp1); 	# *r
279		}
280
281	&comment("");
282	&add($a,32);
283	 &add($b,32);
284	&add($r,32);
285	 &sub($num,8);
286	&jnz(&label("aw_loop"));
287
288	&set_label("aw_finish",0);
289	&mov($num,&wparam(3));	# get num
290	&and($num,7);
291	 &jz(&label("aw_end"));
292
293	for ($i=0; $i<7; $i++)
294		{
295		&comment("Tail Round $i");
296		&mov($tmp1,&DWP($i*4,$a,"",0));	# *a
297		 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
298		&add($tmp1,$c);
299		 &mov($c,0);
300		&adc($c,$c);
301		 &add($tmp1,$tmp2);
302		&adc($c,0);
303		 &dec($num) if ($i != 6);
304		&mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
305		 &jz(&label("aw_end")) if ($i != 6);
306		}
307	&set_label("aw_end",0);
308
309#	&mov("eax",$c);		# $c is "eax"
310
311	&function_end($name);
312	}
313
314sub bn_sub_words
315	{
316	local($name)=@_;
317
318	&function_begin($name,"");
319
320	&comment("");
321	$a="esi";
322	$b="edi";
323	$c="eax";
324	$r="ebx";
325	$tmp1="ecx";
326	$tmp2="edx";
327	$num="ebp";
328
329	&mov($r,&wparam(0));	# get r
330	 &mov($a,&wparam(1));	# get a
331	&mov($b,&wparam(2));	# get b
332	 &mov($num,&wparam(3));	# get num
333	&xor($c,$c);		# clear carry
334	 &and($num,0xfffffff8);	# num / 8
335
336	&jz(&label("aw_finish"));
337
338	&set_label("aw_loop",0);
339	for ($i=0; $i<8; $i++)
340		{
341		&comment("Round $i");
342
343		&mov($tmp1,&DWP($i*4,$a,"",0)); 	# *a
344		 &mov($tmp2,&DWP($i*4,$b,"",0)); 	# *b
345		&sub($tmp1,$c);
346		 &mov($c,0);
347		&adc($c,$c);
348		 &sub($tmp1,$tmp2);
349		&adc($c,0);
350		 &mov(&DWP($i*4,$r,"",0),$tmp1); 	# *r
351		}
352
353	&comment("");
354	&add($a,32);
355	 &add($b,32);
356	&add($r,32);
357	 &sub($num,8);
358	&jnz(&label("aw_loop"));
359
360	&set_label("aw_finish",0);
361	&mov($num,&wparam(3));	# get num
362	&and($num,7);
363	 &jz(&label("aw_end"));
364
365	for ($i=0; $i<7; $i++)
366		{
367		&comment("Tail Round $i");
368		&mov($tmp1,&DWP($i*4,$a,"",0));	# *a
369		 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
370		&sub($tmp1,$c);
371		 &mov($c,0);
372		&adc($c,$c);
373		 &sub($tmp1,$tmp2);
374		&adc($c,0);
375		 &dec($num) if ($i != 6);
376		&mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
377		 &jz(&label("aw_end")) if ($i != 6);
378		}
379	&set_label("aw_end",0);
380
381#	&mov("eax",$c);		# $c is "eax"
382
383	&function_end($name);
384	}
385
386sub bn_sub_part_words
387	{
388	local($name)=@_;
389
390	&function_begin($name,"");
391
392	&comment("");
393	$a="esi";
394	$b="edi";
395	$c="eax";
396	$r="ebx";
397	$tmp1="ecx";
398	$tmp2="edx";
399	$num="ebp";
400
401	&mov($r,&wparam(0));	# get r
402	 &mov($a,&wparam(1));	# get a
403	&mov($b,&wparam(2));	# get b
404	 &mov($num,&wparam(3));	# get num
405	&xor($c,$c);		# clear carry
406	 &and($num,0xfffffff8);	# num / 8
407
408	&jz(&label("aw_finish"));
409
410	&set_label("aw_loop",0);
411	for ($i=0; $i<8; $i++)
412		{
413		&comment("Round $i");
414
415		&mov($tmp1,&DWP($i*4,$a,"",0)); 	# *a
416		 &mov($tmp2,&DWP($i*4,$b,"",0)); 	# *b
417		&sub($tmp1,$c);
418		 &mov($c,0);
419		&adc($c,$c);
420		 &sub($tmp1,$tmp2);
421		&adc($c,0);
422		 &mov(&DWP($i*4,$r,"",0),$tmp1); 	# *r
423		}
424
425	&comment("");
426	&add($a,32);
427	 &add($b,32);
428	&add($r,32);
429	 &sub($num,8);
430	&jnz(&label("aw_loop"));
431
432	&set_label("aw_finish",0);
433	&mov($num,&wparam(3));	# get num
434	&and($num,7);
435	 &jz(&label("aw_end"));
436
437	for ($i=0; $i<7; $i++)
438		{
439		&comment("Tail Round $i");
440		&mov($tmp1,&DWP(0,$a,"",0));	# *a
441		 &mov($tmp2,&DWP(0,$b,"",0));# *b
442		&sub($tmp1,$c);
443		 &mov($c,0);
444		&adc($c,$c);
445		 &sub($tmp1,$tmp2);
446		&adc($c,0);
447		&mov(&DWP(0,$r,"",0),$tmp1);	# *r
448		&add($a, 4);
449		&add($b, 4);
450		&add($r, 4);
451		 &dec($num) if ($i != 6);
452		 &jz(&label("aw_end")) if ($i != 6);
453		}
454	&set_label("aw_end",0);
455
456	&cmp(&wparam(4),0);
457	&je(&label("pw_end"));
458
459	&mov($num,&wparam(4));	# get dl
460	&cmp($num,0);
461	&je(&label("pw_end"));
462	&jge(&label("pw_pos"));
463
464	&comment("pw_neg");
465	&mov($tmp2,0);
466	&sub($tmp2,$num);
467	&mov($num,$tmp2);
468	&and($num,0xfffffff8);	# num / 8
469	&jz(&label("pw_neg_finish"));
470
471	&set_label("pw_neg_loop",0);
472	for ($i=0; $i<8; $i++)
473	{
474	    &comment("dl<0 Round $i");
475
476	    &mov($tmp1,0);
477	    &mov($tmp2,&DWP($i*4,$b,"",0)); 	# *b
478	    &sub($tmp1,$c);
479	    &mov($c,0);
480	    &adc($c,$c);
481	    &sub($tmp1,$tmp2);
482	    &adc($c,0);
483	    &mov(&DWP($i*4,$r,"",0),$tmp1); 	# *r
484	}
485
486	&comment("");
487	&add($b,32);
488	&add($r,32);
489	&sub($num,8);
490	&jnz(&label("pw_neg_loop"));
491
492	&set_label("pw_neg_finish",0);
493	&mov($tmp2,&wparam(4));	# get dl
494	&mov($num,0);
495	&sub($num,$tmp2);
496	&and($num,7);
497	&jz(&label("pw_end"));
498
499	for ($i=0; $i<7; $i++)
500	{
501	    &comment("dl<0 Tail Round $i");
502	    &mov($tmp1,0);
503	    &mov($tmp2,&DWP($i*4,$b,"",0));# *b
504	    &sub($tmp1,$c);
505	    &mov($c,0);
506	    &adc($c,$c);
507	    &sub($tmp1,$tmp2);
508	    &adc($c,0);
509	    &dec($num) if ($i != 6);
510	    &mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
511	    &jz(&label("pw_end")) if ($i != 6);
512	}
513
514	&jmp(&label("pw_end"));
515
516	&set_label("pw_pos",0);
517
518	&and($num,0xfffffff8);	# num / 8
519	&jz(&label("pw_pos_finish"));
520
521	&set_label("pw_pos_loop",0);
522
523	for ($i=0; $i<8; $i++)
524	{
525	    &comment("dl>0 Round $i");
526
527	    &mov($tmp1,&DWP($i*4,$a,"",0));	# *a
528	    &sub($tmp1,$c);
529	    &mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
530	    &jnc(&label("pw_nc".$i));
531	}
532
533	&comment("");
534	&add($a,32);
535	&add($r,32);
536	&sub($num,8);
537	&jnz(&label("pw_pos_loop"));
538
539	&set_label("pw_pos_finish",0);
540	&mov($num,&wparam(4));	# get dl
541	&and($num,7);
542	&jz(&label("pw_end"));
543
544	for ($i=0; $i<7; $i++)
545	{
546	    &comment("dl>0 Tail Round $i");
547	    &mov($tmp1,&DWP($i*4,$a,"",0));	# *a
548	    &sub($tmp1,$c);
549	    &mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
550	    &jnc(&label("pw_tail_nc".$i));
551	    &dec($num) if ($i != 6);
552	    &jz(&label("pw_end")) if ($i != 6);
553	}
554	&mov($c,1);
555	&jmp(&label("pw_end"));
556
557	&set_label("pw_nc_loop",0);
558	for ($i=0; $i<8; $i++)
559	{
560	    &mov($tmp1,&DWP($i*4,$a,"",0));	# *a
561	    &mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
562	    &set_label("pw_nc".$i,0);
563	}
564
565	&comment("");
566	&add($a,32);
567	&add($r,32);
568	&sub($num,8);
569	&jnz(&label("pw_nc_loop"));
570
571	&mov($num,&wparam(4));	# get dl
572	&and($num,7);
573	&jz(&label("pw_nc_end"));
574
575	for ($i=0; $i<7; $i++)
576	{
577	    &mov($tmp1,&DWP($i*4,$a,"",0));	# *a
578	    &mov(&DWP($i*4,$r,"",0),$tmp1);	# *r
579	    &set_label("pw_tail_nc".$i,0);
580	    &dec($num) if ($i != 6);
581	    &jz(&label("pw_nc_end")) if ($i != 6);
582	}
583
584	&set_label("pw_nc_end",0);
585	&mov($c,0);
586
587	&set_label("pw_end",0);
588
589#	&mov("eax",$c);		# $c is "eax"
590
591	&function_end($name);
592	}
593
594