1dnl Alpha ev6 nails mpn_addmul_2. 2 3dnl Copyright 2002, 2005, 2006 Free Software Foundation, Inc. 4dnl 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or 8dnl modify it under the terms of the GNU Lesser General Public License as 9dnl published by the Free Software Foundation; either version 3 of the 10dnl License, or (at your option) any later version. 11dnl 12dnl The GNU MP Library is distributed in the hope that it will be useful, 13dnl but WITHOUT ANY WARRANTY; without even the implied warranty of 14dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15dnl Lesser General Public License for more details. 16dnl 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22C Runs at 4.0 cycles/limb. 23 24C We could either go for 2-way unrolling over 11 cycles, or 2.75 c/l, 25C or 4-way unrolling over 20 cycles, for 2.5 c/l. 26 27 28C INPUT PARAMETERS 29define(`rp',`r16') 30define(`up',`r17') 31define(`n',`r18') 32define(`vp',`r19') 33 34C Useful register aliases 35define(`numb_mask',`r24') 36define(`ulimb',`r25') 37define(`rlimb',`r27') 38 39define(`m0a',`r0') 40define(`m0b',`r1') 41define(`m1a',`r2') 42define(`m1b',`r3') 43 44define(`acc0',`r4') 45define(`acc1',`r5') 46 47define(`v0',`r6') 48define(`v1',`r7') 49 50C Used for temps: r8 r19 r28 51 52define(`NAIL_BITS',`GMP_NAIL_BITS') 53define(`NUMB_BITS',`GMP_NUMB_BITS') 54 55C This declaration is munged by configure 56NAILS_SUPPORT(3-63) 57 58ASM_START() 59PROLOGUE(mpn_addmul_2) 60 lda numb_mask,-1(r31) 61 srl numb_mask,NAIL_BITS,numb_mask 62 63 ldq v0, 0(vp) 64 ldq v1, 8(vp) 65 66 bis r31, r31, acc0 C zero acc0 67 sll v0,NAIL_BITS, v0 68 bis r31, r31, acc1 C zero acc1 69 sll v1,NAIL_BITS, v1 70 bis r31, r31, r19 71 72 ldq ulimb, 0(up) 73 lda up, 8(up) 74 mulq v0, ulimb, m0a C U1 75 umulh v0, ulimb, m0b C U1 76 mulq v1, ulimb, m1a C U1 77 umulh v1, ulimb, m1b C U1 78 lda n, -1(n) 79 beq n, L(end) C U0 80 81 ALIGN(16) 82L(top): bis r31, r31, r31 C U1 nop 83 addq r19, acc0, acc0 C U0 propagate nail 84 ldq rlimb, 0(rp) C L0 85 ldq ulimb, 0(up) C L1 86 87 lda rp, 8(rp) C L1 88 srl m0a,NAIL_BITS, r8 C U0 89 lda up, 8(up) C L0 90 mulq v0, ulimb, m0a C U1 91 92 addq r8, acc0, r19 C U0 93 addq m0b, acc1, acc0 C L1 94 umulh v0, ulimb, m0b C U1 95 bis r31, r31, r31 C L0 nop 96 97 addq rlimb, r19, r19 C L1 FINAL PROD-SUM 98 srl m1a,NAIL_BITS, r8 C U0 99 lda n, -1(n) C L0 100 mulq v1, ulimb, m1a C U1 101 102 addq r8, acc0, acc0 C U0 103 bis r31, m1b, acc1 C L1 104 umulh v1, ulimb, m1b C U1 105 and r19,numb_mask, r28 C L0 extract numb part 106 107 unop 108 srl r19,NUMB_BITS, r19 C U1 extract nail part 109 stq r28, -8(rp) C L1 110 bne n, L(top) C U0 111 112L(end): ldq rlimb, 0(rp) 113 addq r19, acc0, acc0 C propagate nail 114 lda rp, 8(rp) 115 srl m0a,NAIL_BITS, r8 C U0 116 addq r8, acc0, r19 117 addq m0b, acc1, acc0 118 addq rlimb, r19, r19 119 srl m1a,NAIL_BITS, r8 C U0 120 addq r8, acc0, acc0 121 bis r31, m1b, acc1 122 and r19,numb_mask, r28 C extract limb 123 124 srl r19,NUMB_BITS, r19 C extract nail 125 stq r28, -8(rp) 126 127 addq r19, acc0, acc0 C propagate nail 128 and acc0,numb_mask, r28 129 stq r28, 0(rp) 130 srl acc0,NUMB_BITS, r19 131 addq r19, acc1, r0 132 133 ret r31, (r26), 1 134EPILOGUE() 135ASM_END() 136