1dnl SPARC mpn_add_n -- Add two limb vectors of the same length > 0 and store 2dnl sum in a third limb vector. 3 4dnl Copyright 1995, 1996, 2000 Free Software Foundation, Inc. 5 6dnl This file is part of the GNU MP Library. 7 8dnl The GNU MP Library is free software; you can redistribute it and/or modify 9dnl it under the terms of the GNU Lesser General Public License as published 10dnl by the Free Software Foundation; either version 3 of the License, or (at 11dnl your option) any later version. 12 13dnl The GNU MP Library is distributed in the hope that it will be useful, but 14dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16dnl License for more details. 17 18dnl You should have received a copy of the GNU Lesser General Public License 19dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 20 21 22include(`../config.m4') 23 24C INPUT PARAMETERS 25define(res_ptr,%o0) 26define(s1_ptr,%o1) 27define(s2_ptr,%o2) 28define(n,%o3) 29 30ASM_START() 31PROLOGUE(mpn_add_n) 32 xor s2_ptr,res_ptr,%g1 33 andcc %g1,4,%g0 34 bne L(1) C branch if alignment differs 35 nop 36C ** V1a ** 37L(0): andcc res_ptr,4,%g0 C res_ptr unaligned? Side effect: cy=0 38 be L(v1) C if no, branch 39 nop 40C Add least significant limb separately to align res_ptr and s2_ptr 41 ld [s1_ptr],%g4 42 add s1_ptr,4,s1_ptr 43 ld [s2_ptr],%g2 44 add s2_ptr,4,s2_ptr 45 add n,-1,n 46 addcc %g4,%g2,%o4 47 st %o4,[res_ptr] 48 add res_ptr,4,res_ptr 49L(v1): addx %g0,%g0,%o4 C save cy in register 50 cmp n,2 C if n < 2 ... 51 bl L(end2) C ... branch to tail code 52 subcc %g0,%o4,%g0 C restore cy 53 54 ld [s1_ptr+0],%g4 55 addcc n,-10,n 56 ld [s1_ptr+4],%g1 57 ldd [s2_ptr+0],%g2 58 blt L(fin1) 59 subcc %g0,%o4,%g0 C restore cy 60C Add blocks of 8 limbs until less than 8 limbs remain 61L(loop1): 62 addxcc %g4,%g2,%o4 63 ld [s1_ptr+8],%g4 64 addxcc %g1,%g3,%o5 65 ld [s1_ptr+12],%g1 66 ldd [s2_ptr+8],%g2 67 std %o4,[res_ptr+0] 68 addxcc %g4,%g2,%o4 69 ld [s1_ptr+16],%g4 70 addxcc %g1,%g3,%o5 71 ld [s1_ptr+20],%g1 72 ldd [s2_ptr+16],%g2 73 std %o4,[res_ptr+8] 74 addxcc %g4,%g2,%o4 75 ld [s1_ptr+24],%g4 76 addxcc %g1,%g3,%o5 77 ld [s1_ptr+28],%g1 78 ldd [s2_ptr+24],%g2 79 std %o4,[res_ptr+16] 80 addxcc %g4,%g2,%o4 81 ld [s1_ptr+32],%g4 82 addxcc %g1,%g3,%o5 83 ld [s1_ptr+36],%g1 84 ldd [s2_ptr+32],%g2 85 std %o4,[res_ptr+24] 86 addx %g0,%g0,%o4 C save cy in register 87 addcc n,-8,n 88 add s1_ptr,32,s1_ptr 89 add s2_ptr,32,s2_ptr 90 add res_ptr,32,res_ptr 91 bge L(loop1) 92 subcc %g0,%o4,%g0 C restore cy 93 94L(fin1): 95 addcc n,8-2,n 96 blt L(end1) 97 subcc %g0,%o4,%g0 C restore cy 98C Add blocks of 2 limbs until less than 2 limbs remain 99L(loope1): 100 addxcc %g4,%g2,%o4 101 ld [s1_ptr+8],%g4 102 addxcc %g1,%g3,%o5 103 ld [s1_ptr+12],%g1 104 ldd [s2_ptr+8],%g2 105 std %o4,[res_ptr+0] 106 addx %g0,%g0,%o4 C save cy in register 107 addcc n,-2,n 108 add s1_ptr,8,s1_ptr 109 add s2_ptr,8,s2_ptr 110 add res_ptr,8,res_ptr 111 bge L(loope1) 112 subcc %g0,%o4,%g0 C restore cy 113L(end1): 114 addxcc %g4,%g2,%o4 115 addxcc %g1,%g3,%o5 116 std %o4,[res_ptr+0] 117 addx %g0,%g0,%o4 C save cy in register 118 119 andcc n,1,%g0 120 be L(ret1) 121 subcc %g0,%o4,%g0 C restore cy 122C Add last limb 123 ld [s1_ptr+8],%g4 124 ld [s2_ptr+8],%g2 125 addxcc %g4,%g2,%o4 126 st %o4,[res_ptr+8] 127 128L(ret1): 129 retl 130 addx %g0,%g0,%o0 C return carry-out from most sign. limb 131 132L(1): xor s1_ptr,res_ptr,%g1 133 andcc %g1,4,%g0 134 bne L(2) 135 nop 136C ** V1b ** 137 mov s2_ptr,%g1 138 mov s1_ptr,s2_ptr 139 b L(0) 140 mov %g1,s1_ptr 141 142C ** V2 ** 143C If we come here, the alignment of s1_ptr and res_ptr as well as the 144C alignment of s2_ptr and res_ptr differ. Since there are only two ways 145C things can be aligned (that we care about) we now know that the alignment 146C of s1_ptr and s2_ptr are the same. 147 148L(2): cmp n,1 149 be L(jone) 150 nop 151 andcc s1_ptr,4,%g0 C s1_ptr unaligned? Side effect: cy=0 152 be L(v2) C if no, branch 153 nop 154C Add least significant limb separately to align s1_ptr and s2_ptr 155 ld [s1_ptr],%g4 156 add s1_ptr,4,s1_ptr 157 ld [s2_ptr],%g2 158 add s2_ptr,4,s2_ptr 159 add n,-1,n 160 addcc %g4,%g2,%o4 161 st %o4,[res_ptr] 162 add res_ptr,4,res_ptr 163 164L(v2): addx %g0,%g0,%o4 C save cy in register 165 addcc n,-8,n 166 blt L(fin2) 167 subcc %g0,%o4,%g0 C restore cy 168C Add blocks of 8 limbs until less than 8 limbs remain 169L(loop2): 170 ldd [s1_ptr+0],%g2 171 ldd [s2_ptr+0],%o4 172 addxcc %g2,%o4,%g2 173 st %g2,[res_ptr+0] 174 addxcc %g3,%o5,%g3 175 st %g3,[res_ptr+4] 176 ldd [s1_ptr+8],%g2 177 ldd [s2_ptr+8],%o4 178 addxcc %g2,%o4,%g2 179 st %g2,[res_ptr+8] 180 addxcc %g3,%o5,%g3 181 st %g3,[res_ptr+12] 182 ldd [s1_ptr+16],%g2 183 ldd [s2_ptr+16],%o4 184 addxcc %g2,%o4,%g2 185 st %g2,[res_ptr+16] 186 addxcc %g3,%o5,%g3 187 st %g3,[res_ptr+20] 188 ldd [s1_ptr+24],%g2 189 ldd [s2_ptr+24],%o4 190 addxcc %g2,%o4,%g2 191 st %g2,[res_ptr+24] 192 addxcc %g3,%o5,%g3 193 st %g3,[res_ptr+28] 194 addx %g0,%g0,%o4 C save cy in register 195 addcc n,-8,n 196 add s1_ptr,32,s1_ptr 197 add s2_ptr,32,s2_ptr 198 add res_ptr,32,res_ptr 199 bge L(loop2) 200 subcc %g0,%o4,%g0 C restore cy 201 202L(fin2): 203 addcc n,8-2,n 204 blt L(end2) 205 subcc %g0,%o4,%g0 C restore cy 206L(loope2): 207 ldd [s1_ptr+0],%g2 208 ldd [s2_ptr+0],%o4 209 addxcc %g2,%o4,%g2 210 st %g2,[res_ptr+0] 211 addxcc %g3,%o5,%g3 212 st %g3,[res_ptr+4] 213 addx %g0,%g0,%o4 C save cy in register 214 addcc n,-2,n 215 add s1_ptr,8,s1_ptr 216 add s2_ptr,8,s2_ptr 217 add res_ptr,8,res_ptr 218 bge L(loope2) 219 subcc %g0,%o4,%g0 C restore cy 220L(end2): 221 andcc n,1,%g0 222 be L(ret2) 223 subcc %g0,%o4,%g0 C restore cy 224C Add last limb 225L(jone): 226 ld [s1_ptr],%g4 227 ld [s2_ptr],%g2 228 addxcc %g4,%g2,%o4 229 st %o4,[res_ptr] 230 231L(ret2): 232 retl 233 addx %g0,%g0,%o0 C return carry-out from most sign. limb 234EPILOGUE(mpn_add_n) 235