1dnl SPARC mpn_add_n -- Add two limb vectors of the same length > 0 and store 2dnl sum in a third limb vector. 3 4dnl Copyright 2001 Free Software Foundation, Inc. 5 6dnl This file is part of the GNU MP Library. 7 8dnl The GNU MP Library is free software; you can redistribute it and/or modify 9dnl it under the terms of the GNU Lesser General Public License as published 10dnl by the Free Software Foundation; either version 3 of the License, or (at 11dnl your option) any later version. 12 13dnl The GNU MP Library is distributed in the hope that it will be useful, but 14dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16dnl License for more details. 17 18dnl You should have received a copy of the GNU Lesser General Public License 19dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 20 21 22include(`../config.m4') 23 24C INPUT PARAMETERS 25define(rp,%o0) 26define(s1p,%o1) 27define(s2p,%o2) 28define(n,%o3) 29define(cy,%g1) 30 31C This code uses 64-bit operations on `o' and `g' registers. It doesn't 32C require that `o' registers' upper 32 bits are preserved by the operating 33C system, but if they are not, they must be zeroed. That is indeed what 34C happens at least on Slowaris 2.5 and 2.6. 35 36C On UltraSPARC 1 and 2, this code runs at 3 cycles/limb from the Dcache and at 37C about 10 cycles/limb from the Ecache. 38 39ASM_START() 40PROLOGUE(mpn_add_n) 41 lduw [s1p+0],%o4 42 lduw [s2p+0],%o5 43 addcc n,-2,n 44 bl,pn %icc,L(end1) 45 lduw [s1p+4],%g2 46 lduw [s2p+4],%g3 47 be,pn %icc,L(end2) 48 mov 0,cy 49 50 .align 16 51L(loop): 52 add %o4,%o5,%g4 53 add rp,8,rp 54 lduw [s1p+8],%o4 55 fitod %f0,%f2 56C --- 57 add cy,%g4,%g4 58 addcc n,-1,n 59 lduw [s2p+8],%o5 60 fitod %f0,%f2 61C --- 62 srlx %g4,32,cy 63 add s2p,8,s2p 64 stw %g4,[rp-8] 65 be,pn %icc,L(exito)+4 66C --- 67 add %g2,%g3,%g4 68 addcc n,-1,n 69 lduw [s1p+12],%g2 70 fitod %f0,%f2 71C --- 72 add cy,%g4,%g4 73 add s1p,8,s1p 74 lduw [s2p+4],%g3 75 fitod %f0,%f2 76C --- 77 srlx %g4,32,cy 78 bne,pt %icc,L(loop) 79 stw %g4,[rp-4] 80C --- 81L(exite): 82 add %o4,%o5,%g4 83 add cy,%g4,%g4 84 srlx %g4,32,cy 85 stw %g4,[rp+0] 86 add %g2,%g3,%g4 87 add cy,%g4,%g4 88 stw %g4,[rp+4] 89 retl 90 srlx %g4,32,%o0 91 92L(exito): 93 add %g2,%g3,%g4 94 add cy,%g4,%g4 95 srlx %g4,32,cy 96 stw %g4,[rp-4] 97 add %o4,%o5,%g4 98 add cy,%g4,%g4 99 stw %g4,[rp+0] 100 retl 101 srlx %g4,32,%o0 102 103L(end1): 104 add %o4,%o5,%g4 105 stw %g4,[rp+0] 106 retl 107 srlx %g4,32,%o0 108 109L(end2): 110 add %o4,%o5,%g4 111 srlx %g4,32,cy 112 stw %g4,[rp+0] 113 add %g2,%g3,%g4 114 add cy,%g4,%g4 115 stw %g4,[rp+4] 116 retl 117 srlx %g4,32,%o0 118EPILOGUE(mpn_add_n) 119