1dnl Alpha mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1). 2 3dnl Copyright 2003 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of the GNU Lesser General Public License as published 9dnl by the Free Software Foundation; either version 3 of the License, or (at 10dnl your option) any later version. 11 12dnl The GNU MP Library is distributed in the hope that it will be useful, but 13dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15dnl License for more details. 16 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22C cycles/limb 23C EV4: 12.5 24C EV5: 6.25 25C EV6: 4.375 (i.e., worse than separate mpn_lshift and mpn_add_n at 3.875) 26 27C TODO 28C * Write special version for ev6, as this is a slowdown for 100 < n < 2200 29C compared to separate mpn_lshift and mpn_add_n. 30C * Use addq instead of sll for left shift, and similarly cmplt instead of srl 31C for right shift. 32 33dnl INPUT PARAMETERS 34define(`rp',`r16') 35define(`up',`r17') 36define(`vp',`r18') 37define(`n', `r19') 38 39define(`u0', `r8') 40define(`u1', `r1') 41define(`u2', `r2') 42define(`u3', `r3') 43define(`v0', `r4') 44define(`v1', `r5') 45define(`v2', `r6') 46define(`v3', `r7') 47 48define(`cy0', `r0') 49define(`cy1', `r20') 50define(`cy', `r22') 51define(`rr', `r24') 52define(`ps', `r25') 53define(`sl', `r28') 54 55ifdef(`OPERATION_addlsh1_n',` 56 define(ADDSUB, addq) 57 define(CARRY, `cmpult $1,$2,$3') 58 define(func, mpn_addlsh1_n) 59') 60ifdef(`OPERATION_sublsh1_n',` 61 define(ADDSUB, subq) 62 define(CARRY, `cmpult $2,$1,$3') 63 define(func, mpn_sublsh1_n) 64') 65 66MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n) 67 68ASM_START() 69PROLOGUE(func) 70 lda n, -4(n) 71 bis r31, r31, cy1 72 and n, 3, r1 73 beq r1, $Lb00 74 cmpeq r1, 1, r2 75 bne r2, $Lb01 76 cmpeq r1, 2, r2 77 bne r2, $Lb10 78$Lb11: C n = 3, 7, 11, ... 79 ldq v0, 0(vp) 80 ldq u0, 0(up) 81 ldq v1, 8(vp) 82 ldq u1, 8(up) 83 ldq v2, 16(vp) 84 ldq u2, 16(up) 85 lda vp, 24(vp) 86 lda up, 24(up) 87 bge n, $Loop 88 br r31, $Lcj3 89$Lb10: C n = 2, 6, 10, ... 90 bis r31, r31, cy0 91 ldq v1, 0(vp) 92 ldq u1, 0(up) 93 ldq v2, 8(vp) 94 ldq u2, 8(up) 95 lda rp, -8(rp) 96 blt n, $Lcj2 97 ldq v3, 16(vp) 98 ldq u3, 16(up) 99 lda vp, 48(vp) 100 lda up, 16(up) 101 br r31, $LL10 102$Lb01: C n = 1, 5, 9, ... 103 ldq v2, 0(vp) 104 ldq u2, 0(up) 105 lda rp, -16(rp) 106 blt n, $Lcj1 107 ldq v3, 8(vp) 108 ldq u3, 8(up) 109 ldq v0, 16(vp) 110 ldq u0, 16(up) 111 lda vp, 40(vp) 112 lda up, 8(up) 113 lda rp, 32(rp) 114 br r31, $LL01 115$Lb00: C n = 4, 8, 12, ... 116 bis r31, r31, cy0 117 ldq v3, 0(vp) 118 ldq u3, 0(up) 119 ldq v0, 8(vp) 120 ldq u0, 8(up) 121 ldq v1, 16(vp) 122 ldq u1, 16(up) 123 lda vp, 32(vp) 124 lda rp, 8(rp) 125 br r31, $LL00x 126 ALIGN(16) 127C 0 128$Loop: sll v0, 1, sl C left shift vlimb 129 ldq v3, 0(vp) 130C 1 131 ADDSUB u0, sl, ps C ulimb + (vlimb << 1) 132 ldq u3, 0(up) 133C 2 134 ADDSUB ps, cy1, rr C consume carry from previous operation 135 srl v0, 63, cy0 C carry out #1 136C 3 137 CARRY( ps, u0, cy) C carry out #2 138 stq rr, 0(rp) 139C 4 140 addq cy, cy0, cy0 C combine carry out #1 and #2 141 CARRY( rr, ps, cy) C carry out #3 142C 5 143 addq cy, cy0, cy0 C final carry out 144 lda vp, 32(vp) C bookkeeping 145C 6 146$LL10: sll v1, 1, sl 147 ldq v0, -24(vp) 148C 7 149 ADDSUB u1, sl, ps 150 ldq u0, 8(up) 151C 8 152 ADDSUB ps, cy0, rr 153 srl v1, 63, cy1 154C 9 155 CARRY( ps, u1, cy) 156 stq rr, 8(rp) 157C 10 158 addq cy, cy1, cy1 159 CARRY( rr, ps, cy) 160C 11 161 addq cy, cy1, cy1 162 lda rp, 32(rp) C bookkeeping 163C 12 164$LL01: sll v2, 1, sl 165 ldq v1, -16(vp) 166C 13 167 ADDSUB u2, sl, ps 168 ldq u1, 16(up) 169C 14 170 ADDSUB ps, cy1, rr 171 srl v2, 63, cy0 172C 15 173 CARRY( ps, u2, cy) 174 stq rr, -16(rp) 175C 16 176 addq cy, cy0, cy0 177 CARRY( rr, ps, cy) 178C 17 179 addq cy, cy0, cy0 180$LL00x: lda up, 32(up) C bookkeeping 181C 18 182 sll v3, 1, sl 183 ldq v2, -8(vp) 184C 19 185 ADDSUB u3, sl, ps 186 ldq u2, -8(up) 187C 20 188 ADDSUB ps, cy0, rr 189 srl v3, 63, cy1 190C 21 191 CARRY( ps, u3, cy) 192 stq rr, -8(rp) 193C 22 194 addq cy, cy1, cy1 195 CARRY( rr, ps, cy) 196C 23 197 addq cy, cy1, cy1 198 lda n, -4(n) C bookkeeping 199C 24 200 bge n, $Loop 201 202$Lcj3: sll v0, 1, sl 203 ADDSUB u0, sl, ps 204 ADDSUB ps, cy1, rr 205 srl v0, 63, cy0 206 CARRY( ps, u0, cy) 207 stq rr, 0(rp) 208 addq cy, cy0, cy0 209 CARRY( rr, ps, cy) 210 addq cy, cy0, cy0 211 212$Lcj2: sll v1, 1, sl 213 ADDSUB u1, sl, ps 214 ADDSUB ps, cy0, rr 215 srl v1, 63, cy1 216 CARRY( ps, u1, cy) 217 stq rr, 8(rp) 218 addq cy, cy1, cy1 219 CARRY( rr, ps, cy) 220 addq cy, cy1, cy1 221 222$Lcj1: sll v2, 1, sl 223 ADDSUB u2, sl, ps 224 ADDSUB ps, cy1, rr 225 srl v2, 63, cy0 226 CARRY( ps, u2, cy) 227 stq rr, 16(rp) 228 addq cy, cy0, cy0 229 CARRY( rr, ps, cy) 230 addq cy, cy0, cy0 231 232 ret r31,(r26),1 233EPILOGUE() 234ASM_END() 235