bn-586.pl revision 55714
1#!/usr/local/bin/perl
2
3push(@INC,"perlasm","../../perlasm");
4require "x86asm.pl";
5
6&asm_init($ARGV[0],$0);
7
8&bn_mul_add_words("bn_mul_add_words");
9&bn_mul_words("bn_mul_words");
10&bn_sqr_words("bn_sqr_words");
11&bn_div_words("bn_div_words");
12&bn_add_words("bn_add_words");
13&bn_sub_words("bn_sub_words");
14
15&asm_finish();
16
17sub bn_mul_add_words
18	{
19	local($name)=@_;
20
21	&function_begin($name,"");
22
23	&comment("");
24	$Low="eax";
25	$High="edx";
26	$a="ebx";
27	$w="ebp";
28	$r="edi";
29	$c="esi";
30
31	&xor($c,$c);		# clear carry
32	&mov($r,&wparam(0));	#
33
34	&mov("ecx",&wparam(2));	#
35	&mov($a,&wparam(1));	#
36
37	&and("ecx",0xfffffff8);	# num / 8
38	&mov($w,&wparam(3));	#
39
40	&push("ecx");		# Up the stack for a tmp variable
41
42	&jz(&label("maw_finish"));
43
44	&set_label("maw_loop",0);
45
46	&mov(&swtmp(0),"ecx");	#
47
48	for ($i=0; $i<32; $i+=4)
49		{
50		&comment("Round $i");
51
52		 &mov("eax",&DWP($i,$a,"",0)); 	# *a
53		&mul($w);			# *a * w
54		&add("eax",$c);		# L(t)+= *r
55		 &mov($c,&DWP($i,$r,"",0));	# L(t)+= *r
56		&adc("edx",0);			# H(t)+=carry
57		 &add("eax",$c);		# L(t)+=c
58		&adc("edx",0);			# H(t)+=carry
59		 &mov(&DWP($i,$r,"",0),"eax");	# *r= L(t);
60		&mov($c,"edx");			# c=  H(t);
61		}
62
63	&comment("");
64	&mov("ecx",&swtmp(0));	#
65	&add($a,32);
66	&add($r,32);
67	&sub("ecx",8);
68	&jnz(&label("maw_loop"));
69
70	&set_label("maw_finish",0);
71	&mov("ecx",&wparam(2));	# get num
72	&and("ecx",7);
73	&jnz(&label("maw_finish2"));	# helps branch prediction
74	&jmp(&label("maw_end"));
75
76	&set_label("maw_finish2",1);
77	for ($i=0; $i<7; $i++)
78		{
79		&comment("Tail Round $i");
80		 &mov("eax",&DWP($i*4,$a,"",0));# *a
81		&mul($w);			# *a * w
82		&add("eax",$c);			# L(t)+=c
83		 &mov($c,&DWP($i*4,$r,"",0));	# L(t)+= *r
84		&adc("edx",0);			# H(t)+=carry
85		 &add("eax",$c);
86		&adc("edx",0);			# H(t)+=carry
87		 &dec("ecx") if ($i != 7-1);
88		&mov(&DWP($i*4,$r,"",0),"eax");	# *r= L(t);
89		 &mov($c,"edx");			# c=  H(t);
90		&jz(&label("maw_end")) if ($i != 7-1);
91		}
92	&set_label("maw_end",0);
93	&mov("eax",$c);
94
95	&pop("ecx");	# clear variable from
96
97	&function_end($name);
98	}
99
100sub bn_mul_words
101	{
102	local($name)=@_;
103
104	&function_begin($name,"");
105
106	&comment("");
107	$Low="eax";
108	$High="edx";
109	$a="ebx";
110	$w="ecx";
111	$r="edi";
112	$c="esi";
113	$num="ebp";
114
115	&xor($c,$c);		# clear carry
116	&mov($r,&wparam(0));	#
117	&mov($a,&wparam(1));	#
118	&mov($num,&wparam(2));	#
119	&mov($w,&wparam(3));	#
120
121	&and($num,0xfffffff8);	# num / 8
122	&jz(&label("mw_finish"));
123
124	&set_label("mw_loop",0);
125	for ($i=0; $i<32; $i+=4)
126		{
127		&comment("Round $i");
128
129		 &mov("eax",&DWP($i,$a,"",0)); 	# *a
130		&mul($w);			# *a * w
131		&add("eax",$c);			# L(t)+=c
132		 # XXX
133
134		&adc("edx",0);			# H(t)+=carry
135		 &mov(&DWP($i,$r,"",0),"eax");	# *r= L(t);
136
137		&mov($c,"edx");			# c=  H(t);
138		}
139
140	&comment("");
141	&add($a,32);
142	&add($r,32);
143	&sub($num,8);
144	&jz(&label("mw_finish"));
145	&jmp(&label("mw_loop"));
146
147	&set_label("mw_finish",0);
148	&mov($num,&wparam(2));	# get num
149	&and($num,7);
150	&jnz(&label("mw_finish2"));
151	&jmp(&label("mw_end"));
152
153	&set_label("mw_finish2",1);
154	for ($i=0; $i<7; $i++)
155		{
156		&comment("Tail Round $i");
157		 &mov("eax",&DWP($i*4,$a,"",0));# *a
158		&mul($w);			# *a * w
159		&add("eax",$c);			# L(t)+=c
160		 # XXX
161		&adc("edx",0);			# H(t)+=carry
162		 &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t);
163		&mov($c,"edx");			# c=  H(t);
164		 &dec($num) if ($i != 7-1);
165		&jz(&label("mw_end")) if ($i != 7-1);
166		}
167	&set_label("mw_end",0);
168	&mov("eax",$c);
169
170	&function_end($name);
171	}
172
173sub bn_sqr_words
174	{
175	local($name)=@_;
176
177	&function_begin($name,"");
178
179	&comment("");
180	$r="esi";
181	$a="edi";
182	$num="ebx";
183
184	&mov($r,&wparam(0));	#
185	&mov($a,&wparam(1));	#
186	&mov($num,&wparam(2));	#
187
188	&and($num,0xfffffff8);	# num / 8
189	&jz(&label("sw_finish"));
190
191	&set_label("sw_loop",0);
192	for ($i=0; $i<32; $i+=4)
193		{
194		&comment("Round $i");
195		&mov("eax",&DWP($i,$a,"",0)); 	# *a
196		 # XXX
197		&mul("eax");			# *a * *a
198		&mov(&DWP($i*2,$r,"",0),"eax");	#
199		 &mov(&DWP($i*2+4,$r,"",0),"edx");#
200		}
201
202	&comment("");
203	&add($a,32);
204	&add($r,64);
205	&sub($num,8);
206	&jnz(&label("sw_loop"));
207
208	&set_label("sw_finish",0);
209	&mov($num,&wparam(2));	# get num
210	&and($num,7);
211	&jz(&label("sw_end"));
212
213	for ($i=0; $i<7; $i++)
214		{
215		&comment("Tail Round $i");
216		&mov("eax",&DWP($i*4,$a,"",0));	# *a
217		 # XXX
218		&mul("eax");			# *a * *a
219		&mov(&DWP($i*8,$r,"",0),"eax");	#
220		 &dec($num) if ($i != 7-1);
221		&mov(&DWP($i*8+4,$r,"",0),"edx");
222		 &jz(&label("sw_end")) if ($i != 7-1);
223		}
224	&set_label("sw_end",0);
225
226	&function_end($name);
227	}
228
229sub bn_div_words
230	{
231	local($name)=@_;
232
233	&function_begin($name,"");
234	&mov("edx",&wparam(0));	#
235	&mov("eax",&wparam(1));	#
236	&mov("ebx",&wparam(2));	#
237	&div("ebx");
238	&function_end($name);
239	}
240
241sub bn_add_words
242	{
243	local($name)=@_;
244
245	&function_begin($name,"");
246
247	&comment("");
248	$a="esi";
249	$b="edi";
250	$c="eax";
251	$r="ebx";
252	$tmp1="ecx";
253	$tmp2="edx";
254	$num="ebp";
255
256	&mov($r,&wparam(0));	# get r
257	 &mov($a,&wparam(1));	# get a
258	&mov($b,&wparam(2));	# get b
259	 &mov($num,&wparam(3));	# get num
260	&xor($c,$c);		# clear carry
261	 &and($num,0xfffffff8);	# num / 8
262
263	&jz(&label("aw_finish"));
264
265	&set_label("aw_loop",0);
266	for ($i=0; $i<8; $i++)
267		{
268		&comment("Round $i");
269
270		&mov($tmp1,&DWP($i*4,$a,"",0)); 	# *a
271		 &mov($tmp2,&DWP($i*4,$b,"",0)); 	# *b
272		&add($tmp1,$c);
273		 &mov($c,0);
274		&adc($c,$c);
275		 &add($tmp1,$tmp2);
276		&adc($c,0);
277		 &mov(&DWP($i*4,$r,"",0),$tmp1); 	# *r
278		}
279
280	&comment("");
281	&add($a,32);
282	 &add($b,32);
283	&add($r,32);
284	 &sub($num,8);
285	&jnz(&label("aw_loop"));
286
287	&set_label("aw_finish",0);
288	&mov($num,&wparam(3));	# get num
289	&and($num,7);
290	 &jz(&label("aw_end"));
291
292	for ($i=0; $i<7; $i++)
293		{
294		&comment("Tail Round $i");
295		&mov($tmp1,&DWP($i*4,$a,"",0));	# *a
296		 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
297		&add($tmp1,$c);
298		 &mov($c,0);
299		&adc($c,$c);
300		 &add($tmp1,$tmp2);
301		&adc($c,0);
302		 &dec($num) if ($i != 6);
303		&mov(&DWP($i*4,$r,"",0),$tmp1);	# *a
304		 &jz(&label("aw_end")) if ($i != 6);
305		}
306	&set_label("aw_end",0);
307
308#	&mov("eax",$c);		# $c is "eax"
309
310	&function_end($name);
311	}
312
313sub bn_sub_words
314	{
315	local($name)=@_;
316
317	&function_begin($name,"");
318
319	&comment("");
320	$a="esi";
321	$b="edi";
322	$c="eax";
323	$r="ebx";
324	$tmp1="ecx";
325	$tmp2="edx";
326	$num="ebp";
327
328	&mov($r,&wparam(0));	# get r
329	 &mov($a,&wparam(1));	# get a
330	&mov($b,&wparam(2));	# get b
331	 &mov($num,&wparam(3));	# get num
332	&xor($c,$c);		# clear carry
333	 &and($num,0xfffffff8);	# num / 8
334
335	&jz(&label("aw_finish"));
336
337	&set_label("aw_loop",0);
338	for ($i=0; $i<8; $i++)
339		{
340		&comment("Round $i");
341
342		&mov($tmp1,&DWP($i*4,$a,"",0)); 	# *a
343		 &mov($tmp2,&DWP($i*4,$b,"",0)); 	# *b
344		&sub($tmp1,$c);
345		 &mov($c,0);
346		&adc($c,$c);
347		 &sub($tmp1,$tmp2);
348		&adc($c,0);
349		 &mov(&DWP($i*4,$r,"",0),$tmp1); 	# *r
350		}
351
352	&comment("");
353	&add($a,32);
354	 &add($b,32);
355	&add($r,32);
356	 &sub($num,8);
357	&jnz(&label("aw_loop"));
358
359	&set_label("aw_finish",0);
360	&mov($num,&wparam(3));	# get num
361	&and($num,7);
362	 &jz(&label("aw_end"));
363
364	for ($i=0; $i<7; $i++)
365		{
366		&comment("Tail Round $i");
367		&mov($tmp1,&DWP($i*4,$a,"",0));	# *a
368		 &mov($tmp2,&DWP($i*4,$b,"",0));# *b
369		&sub($tmp1,$c);
370		 &mov($c,0);
371		&adc($c,$c);
372		 &sub($tmp1,$tmp2);
373		&adc($c,0);
374		 &dec($num) if ($i != 6);
375		&mov(&DWP($i*4,$r,"",0),$tmp1);	# *a
376		 &jz(&label("aw_end")) if ($i != 6);
377		}
378	&set_label("aw_end",0);
379
380#	&mov("eax",$c);		# $c is "eax"
381
382	&function_end($name);
383	}
384
385