1#!/usr/local/bin/perl 2# alpha assember 3 4sub bn_mul_comba8 5 { 6 local($name)=@_; 7 local(@a,@b,$r,$c0,$c1,$c2); 8 9 $cnt=1; 10 &init_pool(3); 11 12 $rp=&wparam(0); 13 $ap=&wparam(1); 14 $bp=&wparam(2); 15 16 &function_begin($name,""); 17 18 &comment(""); 19 20 &stack_push(2); 21 &ld(($a[0])=&NR(1),&QWPw(0,$ap)); 22 &ld(($b[0])=&NR(1),&QWPw(0,$bp)); 23 &st($reg_s0,&swtmp(0)); &FR($reg_s0); 24 &st($reg_s1,&swtmp(1)); &FR($reg_s1); 25 &ld(($a[1])=&NR(1),&QWPw(1,$ap)); 26 &ld(($b[1])=&NR(1),&QWPw(1,$bp)); 27 &ld(($a[2])=&NR(1),&QWPw(2,$ap)); 28 &ld(($b[2])=&NR(1),&QWPw(2,$bp)); 29 &ld(($a[3])=&NR(1),&QWPw(3,$ap)); 30 &ld(($b[3])=&NR(1),&QWPw(3,$bp)); 31 &ld(($a[4])=&NR(1),&QWPw(1,$ap)); 32 &ld(($b[4])=&NR(1),&QWPw(1,$bp)); 33 &ld(($a[5])=&NR(1),&QWPw(1,$ap)); 34 &ld(($b[5])=&NR(1),&QWPw(1,$bp)); 35 &ld(($a[6])=&NR(1),&QWPw(1,$ap)); 36 &ld(($b[6])=&NR(1),&QWPw(1,$bp)); 37 &ld(($a[7])=&NR(1),&QWPw(1,$ap)); &FR($ap); 38 &ld(($b[7])=&NR(1),&QWPw(1,$bp)); &FR($bp); 39 40 ($c0,$c1,$c2)=&NR(3); 41 &mov("zero",$c2); 42 &mul($a[0],$b[0],$c0); 43 &muh($a[0],$b[0],$c1); 44 &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR(1); 45 ($c0,$c1,$c2)=($c1,$c2,$c0); 46 &mov("zero",$c2); 47 48 &mul_add_c($a[0],$b[1],$c0,$c1,$c2); 49 &mul_add_c($a[1],$b[0],$c0,$c1,$c2); 50 &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR(1); 51 ($c0,$c1,$c2)=($c1,$c2,$c0); 52 &mov("zero",$c2); 53 54 &mul_add_c($a[0],$b[2],$c0,$c1,$c2); 55 &mul_add_c($a[1],$b[1],$c0,$c1,$c2); 56 &mul_add_c($a[2],$b[0],$c0,$c1,$c2); 57 &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR(1); 58 ($c0,$c1,$c2)=($c1,$c2,$c0); 59 &mov("zero",$c2); 60 61 &mul_add_c($a[0],$b[3],$c0,$c1,$c2); 62 &mul_add_c($a[1],$b[2],$c0,$c1,$c2); 63 &mul_add_c($a[2],$b[1],$c0,$c1,$c2); 64 &mul_add_c($a[3],$b[0],$c0,$c1,$c2); 65 &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR(1); 66 ($c0,$c1,$c2)=($c1,$c2,$c0); 67 &mov("zero",$c2); 68 69 &mul_add_c($a[0],$b[4],$c0,$c1,$c2); 70 &mul_add_c($a[1],$b[3],$c0,$c1,$c2); 71 &mul_add_c($a[2],$b[2],$c0,$c1,$c2); 72 &mul_add_c($a[3],$b[1],$c0,$c1,$c2); 73 &mul_add_c($a[4],$b[0],$c0,$c1,$c2); 74 &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR(1); 75 ($c0,$c1,$c2)=($c1,$c2,$c0); 76 &mov("zero",$c2); 77 78 &mul_add_c($a[0],$b[5],$c0,$c1,$c2); 79 &mul_add_c($a[1],$b[4],$c0,$c1,$c2); 80 &mul_add_c($a[2],$b[3],$c0,$c1,$c2); 81 &mul_add_c($a[3],$b[2],$c0,$c1,$c2); 82 &mul_add_c($a[4],$b[1],$c0,$c1,$c2); 83 &mul_add_c($a[5],$b[0],$c0,$c1,$c2); 84 &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR(1); 85 ($c0,$c1,$c2)=($c1,$c2,$c0); 86 &mov("zero",$c2); 87 88 &mul_add_c($a[0],$b[6],$c0,$c1,$c2); 89 &mul_add_c($a[1],$b[5],$c0,$c1,$c2); 90 &mul_add_c($a[2],$b[4],$c0,$c1,$c2); 91 &mul_add_c($a[3],$b[3],$c0,$c1,$c2); 92 &mul_add_c($a[4],$b[2],$c0,$c1,$c2); 93 &mul_add_c($a[5],$b[1],$c0,$c1,$c2); 94 &mul_add_c($a[6],$b[0],$c0,$c1,$c2); 95 &st($c0,&QWPw(6,$rp)); &FR($c0); ($c0)=&NR(1); 96 ($c0,$c1,$c2)=($c1,$c2,$c0); 97 &mov("zero",$c2); 98 99 &mul_add_c($a[0],$b[7],$c0,$c1,$c2); &FR($a[0]); 100 &mul_add_c($a[1],$b[6],$c0,$c1,$c2); 101 &mul_add_c($a[2],$b[5],$c0,$c1,$c2); 102 &mul_add_c($a[3],$b[4],$c0,$c1,$c2); 103 &mul_add_c($a[4],$b[3],$c0,$c1,$c2); 104 &mul_add_c($a[5],$b[2],$c0,$c1,$c2); 105 &mul_add_c($a[6],$b[1],$c0,$c1,$c2); 106 &mul_add_c($a[7],$b[0],$c0,$c1,$c2); &FR($b[0]); 107 &st($c0,&QWPw(7,$rp)); &FR($c0); ($c0)=&NR(1); 108 ($c0,$c1,$c2)=($c1,$c2,$c0); 109 &mov("zero",$c2); 110 111 &mul_add_c($a[1],$b[7],$c0,$c1,$c2); &FR($a[1]); 112 &mul_add_c($a[2],$b[6],$c0,$c1,$c2); 113 &mul_add_c($a[3],$b[5],$c0,$c1,$c2); 114 &mul_add_c($a[4],$b[4],$c0,$c1,$c2); 115 &mul_add_c($a[5],$b[3],$c0,$c1,$c2); 116 &mul_add_c($a[6],$b[2],$c0,$c1,$c2); 117 &mul_add_c($a[7],$b[1],$c0,$c1,$c2); &FR($b[1]); 118 &st($c0,&QWPw(8,$rp)); &FR($c0); ($c0)=&NR(1); 119 ($c0,$c1,$c2)=($c1,$c2,$c0); 120 &mov("zero",$c2); 121 122 &mul_add_c($a[2],$b[7],$c0,$c1,$c2); &FR($a[2]); 123 &mul_add_c($a[3],$b[6],$c0,$c1,$c2); 124 &mul_add_c($a[4],$b[5],$c0,$c1,$c2); 125 &mul_add_c($a[5],$b[4],$c0,$c1,$c2); 126 &mul_add_c($a[6],$b[3],$c0,$c1,$c2); 127 &mul_add_c($a[7],$b[2],$c0,$c1,$c2); &FR($b[2]); 128 &st($c0,&QWPw(9,$rp)); &FR($c0); ($c0)=&NR(1); 129 ($c0,$c1,$c2)=($c1,$c2,$c0); 130 &mov("zero",$c2); 131 132 &mul_add_c($a[3],$b[7],$c0,$c1,$c2); &FR($a[3]); 133 &mul_add_c($a[4],$b[6],$c0,$c1,$c2); 134 &mul_add_c($a[5],$b[5],$c0,$c1,$c2); 135 &mul_add_c($a[6],$b[4],$c0,$c1,$c2); 136 &mul_add_c($a[7],$b[3],$c0,$c1,$c2); &FR($b[3]); 137 &st($c0,&QWPw(10,$rp)); &FR($c0); ($c0)=&NR(1); 138 ($c0,$c1,$c2)=($c1,$c2,$c0); 139 &mov("zero",$c2); 140 141 &mul_add_c($a[4],$b[7],$c0,$c1,$c2); &FR($a[4]); 142 &mul_add_c($a[5],$b[6],$c0,$c1,$c2); 143 &mul_add_c($a[6],$b[5],$c0,$c1,$c2); 144 &mul_add_c($a[7],$b[4],$c0,$c1,$c2); &FR($b[4]); 145 &st($c0,&QWPw(11,$rp)); &FR($c0); ($c0)=&NR(1); 146 ($c0,$c1,$c2)=($c1,$c2,$c0); 147 &mov("zero",$c2); 148 149 &mul_add_c($a[5],$b[7],$c0,$c1,$c2); &FR($a[5]); 150 &mul_add_c($a[6],$b[6],$c0,$c1,$c2); 151 &mul_add_c($a[7],$b[5],$c0,$c1,$c2); &FR($b[5]); 152 &st($c0,&QWPw(12,$rp)); &FR($c0); ($c0)=&NR(1); 153 ($c0,$c1,$c2)=($c1,$c2,$c0); 154 &mov("zero",$c2); 155 156 &mul_add_c($a[6],$b[7],$c0,$c1,$c2); &FR($a[6]); 157 &mul_add_c($a[7],$b[6],$c0,$c1,$c2); &FR($b[6]); 158 &st($c0,&QWPw(13,$rp)); &FR($c0); ($c0)=&NR(1); 159 ($c0,$c1,$c2)=($c1,$c2,$c0); 160 &mov("zero",$c2); 161 162 &mul_add_c($a[7],$b[7],$c0,$c1,$c2); &FR($a[7],$b[7]); 163 &st($c0,&QWPw(14,$rp)); 164 &st($c1,&QWPw(15,$rp)); 165 166 &FR($c0,$c1,$c2); 167 168 &ld($reg_s0,&swtmp(0)); 169 &ld($reg_s1,&swtmp(1)); 170 &stack_pop(2); 171 172 &function_end($name); 173 174 &fin_pool; 175 } 176 1771; 178