1dnl SPARC mpn_sub_n -- Subtract two limb vectors of the same length > 0 and 2dnl store difference in a third limb vector. 3 4dnl Copyright 2001 Free Software Foundation, Inc. 5 6dnl This file is part of the GNU MP Library. 7 8dnl The GNU MP Library is free software; you can redistribute it and/or modify 9dnl it under the terms of the GNU Lesser General Public License as published 10dnl by the Free Software Foundation; either version 3 of the License, or (at 11dnl your option) any later version. 12 13dnl The GNU MP Library is distributed in the hope that it will be useful, but 14dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16dnl License for more details. 17 18dnl You should have received a copy of the GNU Lesser General Public License 19dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 20 21 22include(`../config.m4') 23 24C INPUT PARAMETERS 25define(rp,%o0) 26define(s1p,%o1) 27define(s2p,%o2) 28define(n,%o3) 29define(cy,%g1) 30 31C This code uses 64-bit operations on `o' and `g' registers. It doesn't 32C require that `o' registers' upper 32 bits are preserved by the operating 33C system, but if they are not, they must be zeroed. That is indeed what 34C happens at least on Slowaris 2.5 and 2.6. 35 36C On UltraSPARC 1 and 2, this code runs at 3 cycles/limb from the Dcache and at 37C about 10 cycles/limb from the Ecache. 38 39ASM_START() 40PROLOGUE(mpn_sub_n) 41 lduw [s1p+0],%o4 42 lduw [s2p+0],%o5 43 addcc n,-2,n 44 bl,pn %icc,L(end1) 45 lduw [s1p+4],%g2 46 lduw [s2p+4],%g3 47 be,pn %icc,L(end2) 48 mov 0,cy 49 50 .align 16 51L(loop): 52 sub %o4,%o5,%g4 53 add rp,8,rp 54 lduw [s1p+8],%o4 55 fitod %f0,%f2 56C --- 57 sub %g4,cy,%g4 58 addcc n,-1,n 59 lduw [s2p+8],%o5 60 fitod %f0,%f2 61C --- 62 srlx %g4,63,cy 63 add s2p,8,s2p 64 stw %g4,[rp-8] 65 be,pn %icc,L(exito)+4 66C --- 67 sub %g2,%g3,%g4 68 addcc n,-1,n 69 lduw [s1p+12],%g2 70 fitod %f0,%f2 71C --- 72 sub %g4,cy,%g4 73 add s1p,8,s1p 74 lduw [s2p+4],%g3 75 fitod %f0,%f2 76C --- 77 srlx %g4,63,cy 78 bne,pt %icc,L(loop) 79 stw %g4,[rp-4] 80C --- 81L(exite): 82 sub %o4,%o5,%g4 83 sub %g4,cy,%g4 84 srlx %g4,63,cy 85 stw %g4,[rp+0] 86 sub %g2,%g3,%g4 87 sub %g4,cy,%g4 88 stw %g4,[rp+4] 89 retl 90 srlx %g4,63,%o0 91 92L(exito): 93 sub %g2,%g3,%g4 94 sub %g4,cy,%g4 95 srlx %g4,63,cy 96 stw %g4,[rp-4] 97 sub %o4,%o5,%g4 98 sub %g4,cy,%g4 99 stw %g4,[rp+0] 100 retl 101 srlx %g4,63,%o0 102 103L(end1): 104 sub %o4,%o5,%g4 105 stw %g4,[rp+0] 106 retl 107 srlx %g4,63,%o0 108 109L(end2): 110 sub %o4,%o5,%g4 111 srlx %g4,63,cy 112 stw %g4,[rp+0] 113 sub %g2,%g3,%g4 114 sub %g4,cy,%g4 115 stw %g4,[rp+4] 116 retl 117 srlx %g4,63,%o0 118EPILOGUE(mpn_sub_n) 119