1#!/usr/local/bin/perl
2# alpha assember
3
4sub bn_mul_comba8
5	{
6	local($name)=@_;
7	local(@a,@b,$r,$c0,$c1,$c2);
8
9	$cnt=1;
10	&init_pool(3);
11
12	$rp=&wparam(0);
13	$ap=&wparam(1);
14	$bp=&wparam(2);
15
16	&function_begin($name,"");
17
18	&comment("");
19
20	&stack_push(2);
21	&ld(($a[0])=&NR(1),&QWPw(0,$ap));
22	&ld(($b[0])=&NR(1),&QWPw(0,$bp));
23	&st($reg_s0,&swtmp(0)); &FR($reg_s0);
24	&st($reg_s1,&swtmp(1)); &FR($reg_s1);
25	&ld(($a[1])=&NR(1),&QWPw(1,$ap));
26	&ld(($b[1])=&NR(1),&QWPw(1,$bp));
27	&ld(($a[2])=&NR(1),&QWPw(2,$ap));
28	&ld(($b[2])=&NR(1),&QWPw(2,$bp));
29	&ld(($a[3])=&NR(1),&QWPw(3,$ap));
30	&ld(($b[3])=&NR(1),&QWPw(3,$bp));
31	&ld(($a[4])=&NR(1),&QWPw(1,$ap));
32	&ld(($b[4])=&NR(1),&QWPw(1,$bp));
33	&ld(($a[5])=&NR(1),&QWPw(1,$ap));
34	&ld(($b[5])=&NR(1),&QWPw(1,$bp));
35	&ld(($a[6])=&NR(1),&QWPw(1,$ap));
36	&ld(($b[6])=&NR(1),&QWPw(1,$bp));
37	&ld(($a[7])=&NR(1),&QWPw(1,$ap));	&FR($ap);
38	&ld(($b[7])=&NR(1),&QWPw(1,$bp));	&FR($bp);
39
40	($c0,$c1,$c2)=&NR(3);
41	&mov("zero",$c2);
42	&mul($a[0],$b[0],$c0);
43	&muh($a[0],$b[0],$c1);
44	&st($c0,&QWPw(0,$rp));			&FR($c0); ($c0)=&NR(1);
45	($c0,$c1,$c2)=($c1,$c2,$c0);
46	&mov("zero",$c2);
47
48	&mul_add_c($a[0],$b[1],$c0,$c1,$c2);
49	&mul_add_c($a[1],$b[0],$c0,$c1,$c2);
50	&st($c0,&QWPw(1,$rp));			&FR($c0); ($c0)=&NR(1);
51	($c0,$c1,$c2)=($c1,$c2,$c0);
52	&mov("zero",$c2);
53
54	&mul_add_c($a[0],$b[2],$c0,$c1,$c2);
55	&mul_add_c($a[1],$b[1],$c0,$c1,$c2);
56	&mul_add_c($a[2],$b[0],$c0,$c1,$c2);
57	&st($c0,&QWPw(2,$rp));			&FR($c0); ($c0)=&NR(1);
58	($c0,$c1,$c2)=($c1,$c2,$c0);
59	&mov("zero",$c2);
60
61	&mul_add_c($a[0],$b[3],$c0,$c1,$c2);
62	&mul_add_c($a[1],$b[2],$c0,$c1,$c2);
63	&mul_add_c($a[2],$b[1],$c0,$c1,$c2);
64	&mul_add_c($a[3],$b[0],$c0,$c1,$c2);
65	&st($c0,&QWPw(3,$rp));			&FR($c0); ($c0)=&NR(1);
66	($c0,$c1,$c2)=($c1,$c2,$c0);
67	&mov("zero",$c2);
68
69	&mul_add_c($a[0],$b[4],$c0,$c1,$c2);
70	&mul_add_c($a[1],$b[3],$c0,$c1,$c2);
71	&mul_add_c($a[2],$b[2],$c0,$c1,$c2);
72	&mul_add_c($a[3],$b[1],$c0,$c1,$c2);
73	&mul_add_c($a[4],$b[0],$c0,$c1,$c2);
74	&st($c0,&QWPw(4,$rp));			&FR($c0); ($c0)=&NR(1);
75	($c0,$c1,$c2)=($c1,$c2,$c0);
76	&mov("zero",$c2);
77
78	&mul_add_c($a[0],$b[5],$c0,$c1,$c2);
79	&mul_add_c($a[1],$b[4],$c0,$c1,$c2);
80	&mul_add_c($a[2],$b[3],$c0,$c1,$c2);
81	&mul_add_c($a[3],$b[2],$c0,$c1,$c2);
82	&mul_add_c($a[4],$b[1],$c0,$c1,$c2);
83	&mul_add_c($a[5],$b[0],$c0,$c1,$c2);
84	&st($c0,&QWPw(5,$rp));			&FR($c0); ($c0)=&NR(1);
85	($c0,$c1,$c2)=($c1,$c2,$c0);
86	&mov("zero",$c2);
87
88	&mul_add_c($a[0],$b[6],$c0,$c1,$c2);
89	&mul_add_c($a[1],$b[5],$c0,$c1,$c2);
90	&mul_add_c($a[2],$b[4],$c0,$c1,$c2);
91	&mul_add_c($a[3],$b[3],$c0,$c1,$c2);
92	&mul_add_c($a[4],$b[2],$c0,$c1,$c2);
93	&mul_add_c($a[5],$b[1],$c0,$c1,$c2);
94	&mul_add_c($a[6],$b[0],$c0,$c1,$c2);
95	&st($c0,&QWPw(6,$rp));			&FR($c0); ($c0)=&NR(1);
96	($c0,$c1,$c2)=($c1,$c2,$c0);
97	&mov("zero",$c2);
98
99	&mul_add_c($a[0],$b[7],$c0,$c1,$c2);	&FR($a[0]);
100	&mul_add_c($a[1],$b[6],$c0,$c1,$c2);
101	&mul_add_c($a[2],$b[5],$c0,$c1,$c2);
102	&mul_add_c($a[3],$b[4],$c0,$c1,$c2);
103	&mul_add_c($a[4],$b[3],$c0,$c1,$c2);
104	&mul_add_c($a[5],$b[2],$c0,$c1,$c2);
105	&mul_add_c($a[6],$b[1],$c0,$c1,$c2);
106	&mul_add_c($a[7],$b[0],$c0,$c1,$c2);	&FR($b[0]);
107	&st($c0,&QWPw(7,$rp));			&FR($c0); ($c0)=&NR(1);
108	($c0,$c1,$c2)=($c1,$c2,$c0);
109	&mov("zero",$c2);
110
111	&mul_add_c($a[1],$b[7],$c0,$c1,$c2);	&FR($a[1]);
112	&mul_add_c($a[2],$b[6],$c0,$c1,$c2);
113	&mul_add_c($a[3],$b[5],$c0,$c1,$c2);
114	&mul_add_c($a[4],$b[4],$c0,$c1,$c2);
115	&mul_add_c($a[5],$b[3],$c0,$c1,$c2);
116	&mul_add_c($a[6],$b[2],$c0,$c1,$c2);
117	&mul_add_c($a[7],$b[1],$c0,$c1,$c2);	&FR($b[1]);
118	&st($c0,&QWPw(8,$rp));			&FR($c0); ($c0)=&NR(1);
119	($c0,$c1,$c2)=($c1,$c2,$c0);
120	&mov("zero",$c2);
121
122	&mul_add_c($a[2],$b[7],$c0,$c1,$c2);	&FR($a[2]);
123	&mul_add_c($a[3],$b[6],$c0,$c1,$c2);
124	&mul_add_c($a[4],$b[5],$c0,$c1,$c2);
125	&mul_add_c($a[5],$b[4],$c0,$c1,$c2);
126	&mul_add_c($a[6],$b[3],$c0,$c1,$c2);
127	&mul_add_c($a[7],$b[2],$c0,$c1,$c2);	&FR($b[2]);
128	&st($c0,&QWPw(9,$rp));			&FR($c0); ($c0)=&NR(1);
129	($c0,$c1,$c2)=($c1,$c2,$c0);
130	&mov("zero",$c2);
131
132	&mul_add_c($a[3],$b[7],$c0,$c1,$c2);	&FR($a[3]);
133	&mul_add_c($a[4],$b[6],$c0,$c1,$c2);
134	&mul_add_c($a[5],$b[5],$c0,$c1,$c2);
135	&mul_add_c($a[6],$b[4],$c0,$c1,$c2);
136	&mul_add_c($a[7],$b[3],$c0,$c1,$c2);	&FR($b[3]);
137	&st($c0,&QWPw(10,$rp));			&FR($c0); ($c0)=&NR(1);
138	($c0,$c1,$c2)=($c1,$c2,$c0);
139	&mov("zero",$c2);
140
141	&mul_add_c($a[4],$b[7],$c0,$c1,$c2);	&FR($a[4]);
142	&mul_add_c($a[5],$b[6],$c0,$c1,$c2);
143	&mul_add_c($a[6],$b[5],$c0,$c1,$c2);
144	&mul_add_c($a[7],$b[4],$c0,$c1,$c2);	&FR($b[4]);
145	&st($c0,&QWPw(11,$rp));			&FR($c0); ($c0)=&NR(1);
146	($c0,$c1,$c2)=($c1,$c2,$c0);
147	&mov("zero",$c2);
148
149	&mul_add_c($a[5],$b[7],$c0,$c1,$c2);	&FR($a[5]);
150	&mul_add_c($a[6],$b[6],$c0,$c1,$c2);
151	&mul_add_c($a[7],$b[5],$c0,$c1,$c2);	&FR($b[5]);
152	&st($c0,&QWPw(12,$rp));			&FR($c0); ($c0)=&NR(1);
153	($c0,$c1,$c2)=($c1,$c2,$c0);
154	&mov("zero",$c2);
155
156	&mul_add_c($a[6],$b[7],$c0,$c1,$c2);	&FR($a[6]);
157	&mul_add_c($a[7],$b[6],$c0,$c1,$c2);	&FR($b[6]);
158	&st($c0,&QWPw(13,$rp));			&FR($c0); ($c0)=&NR(1);
159	($c0,$c1,$c2)=($c1,$c2,$c0);
160	&mov("zero",$c2);
161
162	&mul_add_c($a[7],$b[7],$c0,$c1,$c2);	&FR($a[7],$b[7]);
163	&st($c0,&QWPw(14,$rp));
164	&st($c1,&QWPw(15,$rp));
165
166	&FR($c0,$c1,$c2);
167
168	&ld($reg_s0,&swtmp(0));
169	&ld($reg_s1,&swtmp(1));
170	&stack_pop(2);
171
172	&function_end($name);
173
174	&fin_pool;
175	}
176
1771;
178