aors_n.asm revision 1.1.1.1
1dnl PowerPC-32 mpn_add_n/mpn_sub_n -- mpn addition and subtraction. 2 3dnl Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2007, 2011 Free Software 4dnl Foundation, Inc. 5 6dnl This file is part of the GNU MP Library. 7 8dnl The GNU MP Library is free software; you can redistribute it and/or modify 9dnl it under the terms of the GNU Lesser General Public License as published 10dnl by the Free Software Foundation; either version 3 of the License, or (at 11dnl your option) any later version. 12 13dnl The GNU MP Library is distributed in the hope that it will be useful, but 14dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16dnl License for more details. 17 18dnl You should have received a copy of the GNU Lesser General Public License 19dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 20 21include(`../config.m4') 22 23C cycles/limb 24C POWER3/PPC630 1.5 25C POWER4/PPC970 2 26C POWER5 2 27C POWER6 2.78 28C POWER7 2.15-2.87 29 30C This code is based on powerpc64/aors_n.asm. 31 32C INPUT PARAMETERS 33C rp r3 34C up r4 35C vp r5 36C n r6 37 38ifdef(`OPERATION_add_n',` 39 define(ADDSUBC, adde) 40 define(ADDSUB, addc) 41 define(func, mpn_add_n) 42 define(func_nc, mpn_add_nc) 43 define(GENRVAL, `addi r3, r3, 1') 44 define(SETCBR, `addic r0, $1, -1') 45 define(CLRCB, `addic r0, r0, 0') 46') 47ifdef(`OPERATION_sub_n',` 48 define(ADDSUBC, subfe) 49 define(ADDSUB, subfc) 50 define(func, mpn_sub_n) 51 define(func_nc, mpn_sub_nc) 52 define(GENRVAL, `neg r3, r3') 53 define(SETCBR, `subfic r0, $1, 0') 54 define(CLRCB, `addic r0, r1, -1') 55') 56 57MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc) 58 59ASM_START() 60PROLOGUE(func_nc) 61 SETCBR(r7) 62 b L(ent) 63EPILOGUE() 64 65PROLOGUE(func) 66 CLRCB 67L(ent): stw r31, -4(r1) 68 stw r30, -8(r1) 69 stw r29, -12(r1) 70 stw r28, -16(r1) 71 72 rlwinm. r0, r6, 0,30,31 C r0 = n & 3, set cr0 73 cmpwi cr6, r0, 2 74 addi r6, r6, 3 C compute count... 75 srwi r6, r6, 2 C ...for ctr 76 mtctr r6 C copy count into ctr 77 beq cr0, L(b00) 78 blt cr6, L(b01) 79 beq cr6, L(b10) 80 81L(b11): lwz r8, 0(r4) C load s1 limb 82 lwz r9, 0(r5) C load s2 limb 83 lwz r10, 4(r4) C load s1 limb 84 lwz r11, 4(r5) C load s2 limb 85 lwz r12, 8(r4) C load s1 limb 86 addi r4, r4, 12 87 lwz r0, 8(r5) C load s2 limb 88 addi r5, r5, 12 89 ADDSUBC r29, r9, r8 90 ADDSUBC r30, r11, r10 91 ADDSUBC r31, r0, r12 92 stw r29, 0(r3) 93 stw r30, 4(r3) 94 stw r31, 8(r3) 95 addi r3, r3, 12 96 bdnz L(go) 97 b L(ret) 98 99L(b01): lwz r12, 0(r4) C load s1 limb 100 addi r4, r4, 4 101 lwz r0, 0(r5) C load s2 limb 102 addi r5, r5, 4 103 ADDSUBC r31, r0, r12 C add 104 stw r31, 0(r3) 105 addi r3, r3, 4 106 bdnz L(go) 107 b L(ret) 108 109L(b10): lwz r10, 0(r4) C load s1 limb 110 lwz r11, 0(r5) C load s2 limb 111 lwz r12, 4(r4) C load s1 limb 112 addi r4, r4, 8 113 lwz r0, 4(r5) C load s2 limb 114 addi r5, r5, 8 115 ADDSUBC r30, r11, r10 C add 116 ADDSUBC r31, r0, r12 C add 117 stw r30, 0(r3) 118 stw r31, 4(r3) 119 addi r3, r3, 8 120 bdnz L(go) 121 b L(ret) 122 123L(b00): C INITCY C clear/set cy 124L(go): lwz r6, 0(r4) C load s1 limb 125 lwz r7, 0(r5) C load s2 limb 126 lwz r8, 4(r4) C load s1 limb 127 lwz r9, 4(r5) C load s2 limb 128 lwz r10, 8(r4) C load s1 limb 129 lwz r11, 8(r5) C load s2 limb 130 lwz r12, 12(r4) C load s1 limb 131 lwz r0, 12(r5) C load s2 limb 132 bdz L(end) 133 134 addi r4, r4, 16 135 addi r5, r5, 16 136 137 ALIGN(16) 138L(top): ADDSUBC r28, r7, r6 139 lwz r6, 0(r4) C load s1 limb 140 lwz r7, 0(r5) C load s2 limb 141 ADDSUBC r29, r9, r8 142 lwz r8, 4(r4) C load s1 limb 143 lwz r9, 4(r5) C load s2 limb 144 ADDSUBC r30, r11, r10 145 lwz r10, 8(r4) C load s1 limb 146 lwz r11, 8(r5) C load s2 limb 147 ADDSUBC r31, r0, r12 148 lwz r12, 12(r4) C load s1 limb 149 lwz r0, 12(r5) C load s2 limb 150 stw r28, 0(r3) 151 addi r4, r4, 16 152 stw r29, 4(r3) 153 addi r5, r5, 16 154 stw r30, 8(r3) 155 stw r31, 12(r3) 156 addi r3, r3, 16 157 bdnz L(top) C decrement ctr and loop back 158 159L(end): ADDSUBC r28, r7, r6 160 ADDSUBC r29, r9, r8 161 ADDSUBC r30, r11, r10 162 ADDSUBC r31, r0, r12 163 stw r28, 0(r3) 164 stw r29, 4(r3) 165 stw r30, 8(r3) 166 stw r31, 12(r3) 167 168L(ret): lwz r31, -4(r1) 169 lwz r30, -8(r1) 170 lwz r29, -12(r1) 171 lwz r28, -16(r1) 172 173 subfe r3, r0, r0 C -cy 174 GENRVAL 175 blr 176EPILOGUE() 177