1dnl SPARC mpn_sub_n -- Subtract two limb vectors of the same length > 0 and 2dnl store difference in a third limb vector. 3 4dnl Copyright 1995, 1996, 2000 Free Software Foundation, Inc. 5 6dnl This file is part of the GNU MP Library. 7dnl 8dnl The GNU MP Library is free software; you can redistribute it and/or modify 9dnl it under the terms of either: 10dnl 11dnl * the GNU Lesser General Public License as published by the Free 12dnl Software Foundation; either version 3 of the License, or (at your 13dnl option) any later version. 14dnl 15dnl or 16dnl 17dnl * the GNU General Public License as published by the Free Software 18dnl Foundation; either version 2 of the License, or (at your option) any 19dnl later version. 20dnl 21dnl or both in parallel, as here. 22dnl 23dnl The GNU MP Library is distributed in the hope that it will be useful, but 24dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26dnl for more details. 27dnl 28dnl You should have received copies of the GNU General Public License and the 29dnl GNU Lesser General Public License along with the GNU MP Library. If not, 30dnl see https://www.gnu.org/licenses/. 31 32 33include(`../config.m4') 34 35C INPUT PARAMETERS 36define(res_ptr,%o0) 37define(s1_ptr,%o1) 38define(s2_ptr,%o2) 39define(n,%o3) 40 41ASM_START() 42PROLOGUE(mpn_sub_n) 43 xor s2_ptr,res_ptr,%g1 44 andcc %g1,4,%g0 45 bne L(1) C branch if alignment differs 46 nop 47C ** V1a ** 48 andcc res_ptr,4,%g0 C res_ptr unaligned? Side effect: cy=0 49 be L(v1) C if no, branch 50 nop 51C Add least significant limb separately to align res_ptr and s2_ptr 52 ld [s1_ptr],%g4 53 add s1_ptr,4,s1_ptr 54 ld [s2_ptr],%g2 55 add s2_ptr,4,s2_ptr 56 add n,-1,n 57 subcc %g4,%g2,%o4 58 st %o4,[res_ptr] 59 add res_ptr,4,res_ptr 60L(v1): addx %g0,%g0,%o4 C save cy in register 61 cmp n,2 C if n < 2 ... 62 bl L(end2) C ... branch to tail code 63 subcc %g0,%o4,%g0 C restore cy 64 65 ld [s1_ptr+0],%g4 66 addcc n,-10,n 67 ld [s1_ptr+4],%g1 68 ldd [s2_ptr+0],%g2 69 blt L(fin1) 70 subcc %g0,%o4,%g0 C restore cy 71C Add blocks of 8 limbs until less than 8 limbs remain 72L(loop1): 73 subxcc %g4,%g2,%o4 74 ld [s1_ptr+8],%g4 75 subxcc %g1,%g3,%o5 76 ld [s1_ptr+12],%g1 77 ldd [s2_ptr+8],%g2 78 std %o4,[res_ptr+0] 79 subxcc %g4,%g2,%o4 80 ld [s1_ptr+16],%g4 81 subxcc %g1,%g3,%o5 82 ld [s1_ptr+20],%g1 83 ldd [s2_ptr+16],%g2 84 std %o4,[res_ptr+8] 85 subxcc %g4,%g2,%o4 86 ld [s1_ptr+24],%g4 87 subxcc %g1,%g3,%o5 88 ld [s1_ptr+28],%g1 89 ldd [s2_ptr+24],%g2 90 std %o4,[res_ptr+16] 91 subxcc %g4,%g2,%o4 92 ld [s1_ptr+32],%g4 93 subxcc %g1,%g3,%o5 94 ld [s1_ptr+36],%g1 95 ldd [s2_ptr+32],%g2 96 std %o4,[res_ptr+24] 97 addx %g0,%g0,%o4 C save cy in register 98 addcc n,-8,n 99 add s1_ptr,32,s1_ptr 100 add s2_ptr,32,s2_ptr 101 add res_ptr,32,res_ptr 102 bge L(loop1) 103 subcc %g0,%o4,%g0 C restore cy 104 105L(fin1): 106 addcc n,8-2,n 107 blt L(end1) 108 subcc %g0,%o4,%g0 C restore cy 109C Add blocks of 2 limbs until less than 2 limbs remain 110L(loope1): 111 subxcc %g4,%g2,%o4 112 ld [s1_ptr+8],%g4 113 subxcc %g1,%g3,%o5 114 ld [s1_ptr+12],%g1 115 ldd [s2_ptr+8],%g2 116 std %o4,[res_ptr+0] 117 addx %g0,%g0,%o4 C save cy in register 118 addcc n,-2,n 119 add s1_ptr,8,s1_ptr 120 add s2_ptr,8,s2_ptr 121 add res_ptr,8,res_ptr 122 bge L(loope1) 123 subcc %g0,%o4,%g0 C restore cy 124L(end1): 125 subxcc %g4,%g2,%o4 126 subxcc %g1,%g3,%o5 127 std %o4,[res_ptr+0] 128 addx %g0,%g0,%o4 C save cy in register 129 130 andcc n,1,%g0 131 be L(ret1) 132 subcc %g0,%o4,%g0 C restore cy 133C Add last limb 134 ld [s1_ptr+8],%g4 135 ld [s2_ptr+8],%g2 136 subxcc %g4,%g2,%o4 137 st %o4,[res_ptr+8] 138 139L(ret1): 140 retl 141 addx %g0,%g0,%o0 C return carry-out from most sign. limb 142 143L(1): xor s1_ptr,res_ptr,%g1 144 andcc %g1,4,%g0 145 bne L(2) 146 nop 147C ** V1b ** 148 andcc res_ptr,4,%g0 C res_ptr unaligned? Side effect: cy=0 149 be L(v1b) C if no, branch 150 nop 151C Add least significant limb separately to align res_ptr and s1_ptr 152 ld [s2_ptr],%g4 153 add s2_ptr,4,s2_ptr 154 ld [s1_ptr],%g2 155 add s1_ptr,4,s1_ptr 156 add n,-1,n 157 subcc %g2,%g4,%o4 158 st %o4,[res_ptr] 159 add res_ptr,4,res_ptr 160L(v1b): addx %g0,%g0,%o4 C save cy in register 161 cmp n,2 C if n < 2 ... 162 bl L(end2) C ... branch to tail code 163 subcc %g0,%o4,%g0 C restore cy 164 165 ld [s2_ptr+0],%g4 166 addcc n,-10,n 167 ld [s2_ptr+4],%g1 168 ldd [s1_ptr+0],%g2 169 blt L(fin1b) 170 subcc %g0,%o4,%g0 C restore cy 171C Add blocks of 8 limbs until less than 8 limbs remain 172L(loop1b): 173 subxcc %g2,%g4,%o4 174 ld [s2_ptr+8],%g4 175 subxcc %g3,%g1,%o5 176 ld [s2_ptr+12],%g1 177 ldd [s1_ptr+8],%g2 178 std %o4,[res_ptr+0] 179 subxcc %g2,%g4,%o4 180 ld [s2_ptr+16],%g4 181 subxcc %g3,%g1,%o5 182 ld [s2_ptr+20],%g1 183 ldd [s1_ptr+16],%g2 184 std %o4,[res_ptr+8] 185 subxcc %g2,%g4,%o4 186 ld [s2_ptr+24],%g4 187 subxcc %g3,%g1,%o5 188 ld [s2_ptr+28],%g1 189 ldd [s1_ptr+24],%g2 190 std %o4,[res_ptr+16] 191 subxcc %g2,%g4,%o4 192 ld [s2_ptr+32],%g4 193 subxcc %g3,%g1,%o5 194 ld [s2_ptr+36],%g1 195 ldd [s1_ptr+32],%g2 196 std %o4,[res_ptr+24] 197 addx %g0,%g0,%o4 C save cy in register 198 addcc n,-8,n 199 add s1_ptr,32,s1_ptr 200 add s2_ptr,32,s2_ptr 201 add res_ptr,32,res_ptr 202 bge L(loop1b) 203 subcc %g0,%o4,%g0 C restore cy 204 205L(fin1b): 206 addcc n,8-2,n 207 blt L(end1b) 208 subcc %g0,%o4,%g0 C restore cy 209C Add blocks of 2 limbs until less than 2 limbs remain 210L(loope1b): 211 subxcc %g2,%g4,%o4 212 ld [s2_ptr+8],%g4 213 subxcc %g3,%g1,%o5 214 ld [s2_ptr+12],%g1 215 ldd [s1_ptr+8],%g2 216 std %o4,[res_ptr+0] 217 addx %g0,%g0,%o4 C save cy in register 218 addcc n,-2,n 219 add s1_ptr,8,s1_ptr 220 add s2_ptr,8,s2_ptr 221 add res_ptr,8,res_ptr 222 bge L(loope1b) 223 subcc %g0,%o4,%g0 C restore cy 224L(end1b): 225 subxcc %g2,%g4,%o4 226 subxcc %g3,%g1,%o5 227 std %o4,[res_ptr+0] 228 addx %g0,%g0,%o4 C save cy in register 229 230 andcc n,1,%g0 231 be L(ret1b) 232 subcc %g0,%o4,%g0 C restore cy 233C Add last limb 234 ld [s2_ptr+8],%g4 235 ld [s1_ptr+8],%g2 236 subxcc %g2,%g4,%o4 237 st %o4,[res_ptr+8] 238 239L(ret1b): 240 retl 241 addx %g0,%g0,%o0 C return carry-out from most sign. limb 242 243C ** V2 ** 244C If we come here, the alignment of s1_ptr and res_ptr as well as the 245C alignment of s2_ptr and res_ptr differ. Since there are only two ways 246C things can be aligned (that we care about) we now know that the alignment 247C of s1_ptr and s2_ptr are the same. 248 249L(2): cmp n,1 250 be L(jone) 251 nop 252 andcc s1_ptr,4,%g0 C s1_ptr unaligned? Side effect: cy=0 253 be L(v2) C if no, branch 254 nop 255C Add least significant limb separately to align s1_ptr and s2_ptr 256 ld [s1_ptr],%g4 257 add s1_ptr,4,s1_ptr 258 ld [s2_ptr],%g2 259 add s2_ptr,4,s2_ptr 260 add n,-1,n 261 subcc %g4,%g2,%o4 262 st %o4,[res_ptr] 263 add res_ptr,4,res_ptr 264 265L(v2): addx %g0,%g0,%o4 C save cy in register 266 addcc n,-8,n 267 blt L(fin2) 268 subcc %g0,%o4,%g0 C restore cy 269C Add blocks of 8 limbs until less than 8 limbs remain 270L(loop2): 271 ldd [s1_ptr+0],%g2 272 ldd [s2_ptr+0],%o4 273 subxcc %g2,%o4,%g2 274 st %g2,[res_ptr+0] 275 subxcc %g3,%o5,%g3 276 st %g3,[res_ptr+4] 277 ldd [s1_ptr+8],%g2 278 ldd [s2_ptr+8],%o4 279 subxcc %g2,%o4,%g2 280 st %g2,[res_ptr+8] 281 subxcc %g3,%o5,%g3 282 st %g3,[res_ptr+12] 283 ldd [s1_ptr+16],%g2 284 ldd [s2_ptr+16],%o4 285 subxcc %g2,%o4,%g2 286 st %g2,[res_ptr+16] 287 subxcc %g3,%o5,%g3 288 st %g3,[res_ptr+20] 289 ldd [s1_ptr+24],%g2 290 ldd [s2_ptr+24],%o4 291 subxcc %g2,%o4,%g2 292 st %g2,[res_ptr+24] 293 subxcc %g3,%o5,%g3 294 st %g3,[res_ptr+28] 295 addx %g0,%g0,%o4 C save cy in register 296 addcc n,-8,n 297 add s1_ptr,32,s1_ptr 298 add s2_ptr,32,s2_ptr 299 add res_ptr,32,res_ptr 300 bge L(loop2) 301 subcc %g0,%o4,%g0 C restore cy 302 303L(fin2): 304 addcc n,8-2,n 305 blt L(end2) 306 subcc %g0,%o4,%g0 C restore cy 307L(loope2): 308 ldd [s1_ptr+0],%g2 309 ldd [s2_ptr+0],%o4 310 subxcc %g2,%o4,%g2 311 st %g2,[res_ptr+0] 312 subxcc %g3,%o5,%g3 313 st %g3,[res_ptr+4] 314 addx %g0,%g0,%o4 C save cy in register 315 addcc n,-2,n 316 add s1_ptr,8,s1_ptr 317 add s2_ptr,8,s2_ptr 318 add res_ptr,8,res_ptr 319 bge L(loope2) 320 subcc %g0,%o4,%g0 C restore cy 321L(end2): 322 andcc n,1,%g0 323 be L(ret2) 324 subcc %g0,%o4,%g0 C restore cy 325C Add last limb 326L(jone): 327 ld [s1_ptr],%g4 328 ld [s2_ptr],%g2 329 subxcc %g4,%g2,%o4 330 st %o4,[res_ptr] 331 332L(ret2): 333 retl 334 addx %g0,%g0,%o0 C return carry-out from most sign. limb 335EPILOGUE(mpn_sub_n) 336