aors_n.asm revision 1.1.1.2
1dnl PowerPC-32 mpn_add_n/mpn_sub_n -- mpn addition and subtraction. 2 3dnl Copyright 1999-2001, 2003-2005, 2007, 2011 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33C cycles/limb 34C POWER3/PPC630 1.5 35C POWER4/PPC970 2 36C POWER5 2 37C POWER6 2.78 38C POWER7 2.15-2.87 39 40C This code is based on powerpc64/aors_n.asm. 41 42C INPUT PARAMETERS 43C rp r3 44C up r4 45C vp r5 46C n r6 47 48ifdef(`OPERATION_add_n',` 49 define(ADDSUBC, adde) 50 define(ADDSUB, addc) 51 define(func, mpn_add_n) 52 define(func_nc, mpn_add_nc) 53 define(GENRVAL, `addi r3, r3, 1') 54 define(SETCBR, `addic r0, $1, -1') 55 define(CLRCB, `addic r0, r0, 0') 56') 57ifdef(`OPERATION_sub_n',` 58 define(ADDSUBC, subfe) 59 define(ADDSUB, subfc) 60 define(func, mpn_sub_n) 61 define(func_nc, mpn_sub_nc) 62 define(GENRVAL, `neg r3, r3') 63 define(SETCBR, `subfic r0, $1, 0') 64 define(CLRCB, `addic r0, r1, -1') 65') 66 67MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc) 68 69ASM_START() 70PROLOGUE(func_nc) 71 SETCBR(r7) 72 b L(ent) 73EPILOGUE() 74 75PROLOGUE(func) 76 CLRCB 77L(ent): stwu r1, -32(r1) 78 rlwinm. r0, r6, 0,30,31 C r0 = n & 3, set cr0 79 cmpwi cr6, r0, 2 80 stw r28, 8(r1) 81 addi r6, r6, 3 C compute count... 82 stw r29, 12(r1) 83 srwi r6, r6, 2 C ...for ctr 84 stw r30, 16(r1) 85 mtctr r6 C copy count into ctr 86 stw r31, 20(r1) 87 beq cr0, L(b00) 88 blt cr6, L(b01) 89 beq cr6, L(b10) 90 91L(b11): lwz r8, 0(r4) C load s1 limb 92 lwz r9, 0(r5) C load s2 limb 93 lwz r10, 4(r4) C load s1 limb 94 lwz r11, 4(r5) C load s2 limb 95 lwz r12, 8(r4) C load s1 limb 96 addi r4, r4, 12 97 lwz r0, 8(r5) C load s2 limb 98 addi r5, r5, 12 99 ADDSUBC r29, r9, r8 100 ADDSUBC r30, r11, r10 101 ADDSUBC r31, r0, r12 102 stw r29, 0(r3) 103 stw r30, 4(r3) 104 stw r31, 8(r3) 105 addi r3, r3, 12 106 bdnz L(go) 107 b L(ret) 108 109L(b01): lwz r12, 0(r4) C load s1 limb 110 addi r4, r4, 4 111 lwz r0, 0(r5) C load s2 limb 112 addi r5, r5, 4 113 ADDSUBC r31, r0, r12 C add 114 stw r31, 0(r3) 115 addi r3, r3, 4 116 bdnz L(go) 117 b L(ret) 118 119L(b10): lwz r10, 0(r4) C load s1 limb 120 lwz r11, 0(r5) C load s2 limb 121 lwz r12, 4(r4) C load s1 limb 122 addi r4, r4, 8 123 lwz r0, 4(r5) C load s2 limb 124 addi r5, r5, 8 125 ADDSUBC r30, r11, r10 C add 126 ADDSUBC r31, r0, r12 C add 127 stw r30, 0(r3) 128 stw r31, 4(r3) 129 addi r3, r3, 8 130 bdnz L(go) 131 b L(ret) 132 133L(b00): C INITCY C clear/set cy 134L(go): lwz r6, 0(r4) C load s1 limb 135 lwz r7, 0(r5) C load s2 limb 136 lwz r8, 4(r4) C load s1 limb 137 lwz r9, 4(r5) C load s2 limb 138 lwz r10, 8(r4) C load s1 limb 139 lwz r11, 8(r5) C load s2 limb 140 lwz r12, 12(r4) C load s1 limb 141 lwz r0, 12(r5) C load s2 limb 142 bdz L(end) 143 144 addi r4, r4, 16 145 addi r5, r5, 16 146 147 ALIGN(16) 148L(top): ADDSUBC r28, r7, r6 149 lwz r6, 0(r4) C load s1 limb 150 lwz r7, 0(r5) C load s2 limb 151 ADDSUBC r29, r9, r8 152 lwz r8, 4(r4) C load s1 limb 153 lwz r9, 4(r5) C load s2 limb 154 ADDSUBC r30, r11, r10 155 lwz r10, 8(r4) C load s1 limb 156 lwz r11, 8(r5) C load s2 limb 157 ADDSUBC r31, r0, r12 158 lwz r12, 12(r4) C load s1 limb 159 lwz r0, 12(r5) C load s2 limb 160 stw r28, 0(r3) 161 addi r4, r4, 16 162 stw r29, 4(r3) 163 addi r5, r5, 16 164 stw r30, 8(r3) 165 stw r31, 12(r3) 166 addi r3, r3, 16 167 bdnz L(top) C decrement ctr and loop back 168 169L(end): ADDSUBC r28, r7, r6 170 ADDSUBC r29, r9, r8 171 ADDSUBC r30, r11, r10 172 ADDSUBC r31, r0, r12 173 stw r28, 0(r3) 174 stw r29, 4(r3) 175 stw r30, 8(r3) 176 stw r31, 12(r3) 177 178L(ret): 179 lwz r28, 8(r1) 180 lwz r29, 12(r1) 181 subfe r3, r0, r0 C -cy 182 lwz r30, 16(r1) 183 GENRVAL 184 lwz r31, 20(r1) 185 addi r1, r1, 32 186 blr 187EPILOGUE() 188