1dnl Alpha mpn_add_n -- Add two limb vectors of the same length > 0 and 2dnl store sum in a third limb vector. 3 4dnl Copyright 1995, 1999, 2000, 2005 Free Software Foundation, Inc. 5 6dnl This file is part of the GNU MP Library. 7 8dnl The GNU MP Library is free software; you can redistribute it and/or modify 9dnl it under the terms of the GNU Lesser General Public License as published 10dnl by the Free Software Foundation; either version 3 of the License, or (at 11dnl your option) any later version. 12 13dnl The GNU MP Library is distributed in the hope that it will be useful, but 14dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16dnl License for more details. 17 18dnl You should have received a copy of the GNU Lesser General Public License 19dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 20 21include(`../config.m4') 22 23C cycles/limb 24C EV4: ? 25C EV5: 4.75 26C EV6: 3 27 28dnl INPUT PARAMETERS 29dnl res_ptr r16 30dnl s1_ptr r17 31dnl s2_ptr r18 32dnl size r19 33 34ASM_START() 35PROLOGUE(mpn_add_n) 36 bis r31,r31,r25 C clear cy 37 subq r19,4,r19 C decr loop cnt 38 blt r19,$Lend2 C if less than 4 limbs, goto 2nd loop 39C Start software pipeline for 1st loop 40 ldq r0,0(r18) 41 ldq r4,0(r17) 42 ldq r1,8(r18) 43 ldq r5,8(r17) 44 addq r17,32,r17 C update s1_ptr 45 ldq r2,16(r18) 46 addq r0,r4,r20 C 1st main add 47 ldq r3,24(r18) 48 subq r19,4,r19 C decr loop cnt 49 ldq r6,-16(r17) 50 cmpult r20,r0,r25 C compute cy from last add 51 ldq r7,-8(r17) 52 addq r1,r5,r28 C 2nd main add 53 addq r18,32,r18 C update s2_ptr 54 addq r28,r25,r21 C 2nd carry add 55 cmpult r28,r5,r8 C compute cy from last add 56 blt r19,$Lend1 C if less than 4 limbs remain, jump 57C 1st loop handles groups of 4 limbs in a software pipeline 58 ALIGN(16) 59$Loop: cmpult r21,r28,r25 C compute cy from last add 60 ldq r0,0(r18) 61 bis r8,r25,r25 C combine cy from the two adds 62 ldq r1,8(r18) 63 addq r2,r6,r28 C 3rd main add 64 ldq r4,0(r17) 65 addq r28,r25,r22 C 3rd carry add 66 ldq r5,8(r17) 67 cmpult r28,r6,r8 C compute cy from last add 68 cmpult r22,r28,r25 C compute cy from last add 69 stq r20,0(r16) 70 bis r8,r25,r25 C combine cy from the two adds 71 stq r21,8(r16) 72 addq r3,r7,r28 C 4th main add 73 addq r28,r25,r23 C 4th carry add 74 cmpult r28,r7,r8 C compute cy from last add 75 cmpult r23,r28,r25 C compute cy from last add 76 addq r17,32,r17 C update s1_ptr 77 bis r8,r25,r25 C combine cy from the two adds 78 addq r16,32,r16 C update res_ptr 79 addq r0,r4,r28 C 1st main add 80 ldq r2,16(r18) 81 addq r25,r28,r20 C 1st carry add 82 ldq r3,24(r18) 83 cmpult r28,r4,r8 C compute cy from last add 84 ldq r6,-16(r17) 85 cmpult r20,r28,r25 C compute cy from last add 86 ldq r7,-8(r17) 87 bis r8,r25,r25 C combine cy from the two adds 88 subq r19,4,r19 C decr loop cnt 89 stq r22,-16(r16) 90 addq r1,r5,r28 C 2nd main add 91 stq r23,-8(r16) 92 addq r25,r28,r21 C 2nd carry add 93 addq r18,32,r18 C update s2_ptr 94 cmpult r28,r5,r8 C compute cy from last add 95 bge r19,$Loop 96C Finish software pipeline for 1st loop 97$Lend1: cmpult r21,r28,r25 C compute cy from last add 98 bis r8,r25,r25 C combine cy from the two adds 99 addq r2,r6,r28 C 3rd main add 100 addq r28,r25,r22 C 3rd carry add 101 cmpult r28,r6,r8 C compute cy from last add 102 cmpult r22,r28,r25 C compute cy from last add 103 stq r20,0(r16) 104 bis r8,r25,r25 C combine cy from the two adds 105 stq r21,8(r16) 106 addq r3,r7,r28 C 4th main add 107 addq r28,r25,r23 C 4th carry add 108 cmpult r28,r7,r8 C compute cy from last add 109 cmpult r23,r28,r25 C compute cy from last add 110 bis r8,r25,r25 C combine cy from the two adds 111 addq r16,32,r16 C update res_ptr 112 stq r22,-16(r16) 113 stq r23,-8(r16) 114$Lend2: addq r19,4,r19 C restore loop cnt 115 beq r19,$Lret 116C Start software pipeline for 2nd loop 117 ldq r0,0(r18) 118 ldq r4,0(r17) 119 subq r19,1,r19 120 beq r19,$Lend0 121C 2nd loop handles remaining 1-3 limbs 122 ALIGN(16) 123$Loop0: addq r0,r4,r28 C main add 124 ldq r0,8(r18) 125 cmpult r28,r4,r8 C compute cy from last add 126 ldq r4,8(r17) 127 addq r28,r25,r20 C carry add 128 addq r18,8,r18 129 addq r17,8,r17 130 stq r20,0(r16) 131 cmpult r20,r28,r25 C compute cy from last add 132 subq r19,1,r19 C decr loop cnt 133 bis r8,r25,r25 C combine cy from the two adds 134 addq r16,8,r16 135 bne r19,$Loop0 136$Lend0: addq r0,r4,r28 C main add 137 addq r28,r25,r20 C carry add 138 cmpult r28,r4,r8 C compute cy from last add 139 cmpult r20,r28,r25 C compute cy from last add 140 stq r20,0(r16) 141 bis r8,r25,r25 C combine cy from the two adds 142 143$Lret: bis r25,r31,r0 C return cy 144 ret r31,(r26),1 145EPILOGUE(mpn_add_n) 146ASM_END() 147