sub_n.asm revision 1.1.1.1
1dnl SPARC mpn_sub_n -- Subtract two limb vectors of the same length > 0 and 2dnl store difference in a third limb vector. 3 4dnl Copyright 1995, 1996, 2000 Free Software Foundation, Inc. 5 6dnl This file is part of the GNU MP Library. 7 8dnl The GNU MP Library is free software; you can redistribute it and/or modify 9dnl it under the terms of the GNU Lesser General Public License as published 10dnl by the Free Software Foundation; either version 3 of the License, or (at 11dnl your option) any later version. 12 13dnl The GNU MP Library is distributed in the hope that it will be useful, but 14dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16dnl License for more details. 17 18dnl You should have received a copy of the GNU Lesser General Public License 19dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 20 21 22include(`../config.m4') 23 24C INPUT PARAMETERS 25define(res_ptr,%o0) 26define(s1_ptr,%o1) 27define(s2_ptr,%o2) 28define(n,%o3) 29 30ASM_START() 31PROLOGUE(mpn_sub_n) 32 xor s2_ptr,res_ptr,%g1 33 andcc %g1,4,%g0 34 bne L(1) C branch if alignment differs 35 nop 36C ** V1a ** 37 andcc res_ptr,4,%g0 C res_ptr unaligned? Side effect: cy=0 38 be L(v1) C if no, branch 39 nop 40C Add least significant limb separately to align res_ptr and s2_ptr 41 ld [s1_ptr],%g4 42 add s1_ptr,4,s1_ptr 43 ld [s2_ptr],%g2 44 add s2_ptr,4,s2_ptr 45 add n,-1,n 46 subcc %g4,%g2,%o4 47 st %o4,[res_ptr] 48 add res_ptr,4,res_ptr 49L(v1): addx %g0,%g0,%o4 C save cy in register 50 cmp n,2 C if n < 2 ... 51 bl L(end2) C ... branch to tail code 52 subcc %g0,%o4,%g0 C restore cy 53 54 ld [s1_ptr+0],%g4 55 addcc n,-10,n 56 ld [s1_ptr+4],%g1 57 ldd [s2_ptr+0],%g2 58 blt L(fin1) 59 subcc %g0,%o4,%g0 C restore cy 60C Add blocks of 8 limbs until less than 8 limbs remain 61L(loop1): 62 subxcc %g4,%g2,%o4 63 ld [s1_ptr+8],%g4 64 subxcc %g1,%g3,%o5 65 ld [s1_ptr+12],%g1 66 ldd [s2_ptr+8],%g2 67 std %o4,[res_ptr+0] 68 subxcc %g4,%g2,%o4 69 ld [s1_ptr+16],%g4 70 subxcc %g1,%g3,%o5 71 ld [s1_ptr+20],%g1 72 ldd [s2_ptr+16],%g2 73 std %o4,[res_ptr+8] 74 subxcc %g4,%g2,%o4 75 ld [s1_ptr+24],%g4 76 subxcc %g1,%g3,%o5 77 ld [s1_ptr+28],%g1 78 ldd [s2_ptr+24],%g2 79 std %o4,[res_ptr+16] 80 subxcc %g4,%g2,%o4 81 ld [s1_ptr+32],%g4 82 subxcc %g1,%g3,%o5 83 ld [s1_ptr+36],%g1 84 ldd [s2_ptr+32],%g2 85 std %o4,[res_ptr+24] 86 addx %g0,%g0,%o4 C save cy in register 87 addcc n,-8,n 88 add s1_ptr,32,s1_ptr 89 add s2_ptr,32,s2_ptr 90 add res_ptr,32,res_ptr 91 bge L(loop1) 92 subcc %g0,%o4,%g0 C restore cy 93 94L(fin1): 95 addcc n,8-2,n 96 blt L(end1) 97 subcc %g0,%o4,%g0 C restore cy 98C Add blocks of 2 limbs until less than 2 limbs remain 99L(loope1): 100 subxcc %g4,%g2,%o4 101 ld [s1_ptr+8],%g4 102 subxcc %g1,%g3,%o5 103 ld [s1_ptr+12],%g1 104 ldd [s2_ptr+8],%g2 105 std %o4,[res_ptr+0] 106 addx %g0,%g0,%o4 C save cy in register 107 addcc n,-2,n 108 add s1_ptr,8,s1_ptr 109 add s2_ptr,8,s2_ptr 110 add res_ptr,8,res_ptr 111 bge L(loope1) 112 subcc %g0,%o4,%g0 C restore cy 113L(end1): 114 subxcc %g4,%g2,%o4 115 subxcc %g1,%g3,%o5 116 std %o4,[res_ptr+0] 117 addx %g0,%g0,%o4 C save cy in register 118 119 andcc n,1,%g0 120 be L(ret1) 121 subcc %g0,%o4,%g0 C restore cy 122C Add last limb 123 ld [s1_ptr+8],%g4 124 ld [s2_ptr+8],%g2 125 subxcc %g4,%g2,%o4 126 st %o4,[res_ptr+8] 127 128L(ret1): 129 retl 130 addx %g0,%g0,%o0 C return carry-out from most sign. limb 131 132L(1): xor s1_ptr,res_ptr,%g1 133 andcc %g1,4,%g0 134 bne L(2) 135 nop 136C ** V1b ** 137 andcc res_ptr,4,%g0 C res_ptr unaligned? Side effect: cy=0 138 be L(v1b) C if no, branch 139 nop 140C Add least significant limb separately to align res_ptr and s1_ptr 141 ld [s2_ptr],%g4 142 add s2_ptr,4,s2_ptr 143 ld [s1_ptr],%g2 144 add s1_ptr,4,s1_ptr 145 add n,-1,n 146 subcc %g2,%g4,%o4 147 st %o4,[res_ptr] 148 add res_ptr,4,res_ptr 149L(v1b): addx %g0,%g0,%o4 C save cy in register 150 cmp n,2 C if n < 2 ... 151 bl L(end2) C ... branch to tail code 152 subcc %g0,%o4,%g0 C restore cy 153 154 ld [s2_ptr+0],%g4 155 addcc n,-10,n 156 ld [s2_ptr+4],%g1 157 ldd [s1_ptr+0],%g2 158 blt L(fin1b) 159 subcc %g0,%o4,%g0 C restore cy 160C Add blocks of 8 limbs until less than 8 limbs remain 161L(loop1b): 162 subxcc %g2,%g4,%o4 163 ld [s2_ptr+8],%g4 164 subxcc %g3,%g1,%o5 165 ld [s2_ptr+12],%g1 166 ldd [s1_ptr+8],%g2 167 std %o4,[res_ptr+0] 168 subxcc %g2,%g4,%o4 169 ld [s2_ptr+16],%g4 170 subxcc %g3,%g1,%o5 171 ld [s2_ptr+20],%g1 172 ldd [s1_ptr+16],%g2 173 std %o4,[res_ptr+8] 174 subxcc %g2,%g4,%o4 175 ld [s2_ptr+24],%g4 176 subxcc %g3,%g1,%o5 177 ld [s2_ptr+28],%g1 178 ldd [s1_ptr+24],%g2 179 std %o4,[res_ptr+16] 180 subxcc %g2,%g4,%o4 181 ld [s2_ptr+32],%g4 182 subxcc %g3,%g1,%o5 183 ld [s2_ptr+36],%g1 184 ldd [s1_ptr+32],%g2 185 std %o4,[res_ptr+24] 186 addx %g0,%g0,%o4 C save cy in register 187 addcc n,-8,n 188 add s1_ptr,32,s1_ptr 189 add s2_ptr,32,s2_ptr 190 add res_ptr,32,res_ptr 191 bge L(loop1b) 192 subcc %g0,%o4,%g0 C restore cy 193 194L(fin1b): 195 addcc n,8-2,n 196 blt L(end1b) 197 subcc %g0,%o4,%g0 C restore cy 198C Add blocks of 2 limbs until less than 2 limbs remain 199L(loope1b): 200 subxcc %g2,%g4,%o4 201 ld [s2_ptr+8],%g4 202 subxcc %g3,%g1,%o5 203 ld [s2_ptr+12],%g1 204 ldd [s1_ptr+8],%g2 205 std %o4,[res_ptr+0] 206 addx %g0,%g0,%o4 C save cy in register 207 addcc n,-2,n 208 add s1_ptr,8,s1_ptr 209 add s2_ptr,8,s2_ptr 210 add res_ptr,8,res_ptr 211 bge L(loope1b) 212 subcc %g0,%o4,%g0 C restore cy 213L(end1b): 214 subxcc %g2,%g4,%o4 215 subxcc %g3,%g1,%o5 216 std %o4,[res_ptr+0] 217 addx %g0,%g0,%o4 C save cy in register 218 219 andcc n,1,%g0 220 be L(ret1b) 221 subcc %g0,%o4,%g0 C restore cy 222C Add last limb 223 ld [s2_ptr+8],%g4 224 ld [s1_ptr+8],%g2 225 subxcc %g2,%g4,%o4 226 st %o4,[res_ptr+8] 227 228L(ret1b): 229 retl 230 addx %g0,%g0,%o0 C return carry-out from most sign. limb 231 232C ** V2 ** 233C If we come here, the alignment of s1_ptr and res_ptr as well as the 234C alignment of s2_ptr and res_ptr differ. Since there are only two ways 235C things can be aligned (that we care about) we now know that the alignment 236C of s1_ptr and s2_ptr are the same. 237 238L(2): cmp n,1 239 be L(jone) 240 nop 241 andcc s1_ptr,4,%g0 C s1_ptr unaligned? Side effect: cy=0 242 be L(v2) C if no, branch 243 nop 244C Add least significant limb separately to align s1_ptr and s2_ptr 245 ld [s1_ptr],%g4 246 add s1_ptr,4,s1_ptr 247 ld [s2_ptr],%g2 248 add s2_ptr,4,s2_ptr 249 add n,-1,n 250 subcc %g4,%g2,%o4 251 st %o4,[res_ptr] 252 add res_ptr,4,res_ptr 253 254L(v2): addx %g0,%g0,%o4 C save cy in register 255 addcc n,-8,n 256 blt L(fin2) 257 subcc %g0,%o4,%g0 C restore cy 258C Add blocks of 8 limbs until less than 8 limbs remain 259L(loop2): 260 ldd [s1_ptr+0],%g2 261 ldd [s2_ptr+0],%o4 262 subxcc %g2,%o4,%g2 263 st %g2,[res_ptr+0] 264 subxcc %g3,%o5,%g3 265 st %g3,[res_ptr+4] 266 ldd [s1_ptr+8],%g2 267 ldd [s2_ptr+8],%o4 268 subxcc %g2,%o4,%g2 269 st %g2,[res_ptr+8] 270 subxcc %g3,%o5,%g3 271 st %g3,[res_ptr+12] 272 ldd [s1_ptr+16],%g2 273 ldd [s2_ptr+16],%o4 274 subxcc %g2,%o4,%g2 275 st %g2,[res_ptr+16] 276 subxcc %g3,%o5,%g3 277 st %g3,[res_ptr+20] 278 ldd [s1_ptr+24],%g2 279 ldd [s2_ptr+24],%o4 280 subxcc %g2,%o4,%g2 281 st %g2,[res_ptr+24] 282 subxcc %g3,%o5,%g3 283 st %g3,[res_ptr+28] 284 addx %g0,%g0,%o4 C save cy in register 285 addcc n,-8,n 286 add s1_ptr,32,s1_ptr 287 add s2_ptr,32,s2_ptr 288 add res_ptr,32,res_ptr 289 bge L(loop2) 290 subcc %g0,%o4,%g0 C restore cy 291 292L(fin2): 293 addcc n,8-2,n 294 blt L(end2) 295 subcc %g0,%o4,%g0 C restore cy 296L(loope2): 297 ldd [s1_ptr+0],%g2 298 ldd [s2_ptr+0],%o4 299 subxcc %g2,%o4,%g2 300 st %g2,[res_ptr+0] 301 subxcc %g3,%o5,%g3 302 st %g3,[res_ptr+4] 303 addx %g0,%g0,%o4 C save cy in register 304 addcc n,-2,n 305 add s1_ptr,8,s1_ptr 306 add s2_ptr,8,s2_ptr 307 add res_ptr,8,res_ptr 308 bge L(loope2) 309 subcc %g0,%o4,%g0 C restore cy 310L(end2): 311 andcc n,1,%g0 312 be L(ret2) 313 subcc %g0,%o4,%g0 C restore cy 314C Add last limb 315L(jone): 316 ld [s1_ptr],%g4 317 ld [s2_ptr],%g2 318 subxcc %g4,%g2,%o4 319 st %o4,[res_ptr] 320 321L(ret2): 322 retl 323 addx %g0,%g0,%o0 C return carry-out from most sign. limb 324EPILOGUE(mpn_sub_n) 325