1dnl PowerPC-64 mpn_add_n, mpn_sub_n optimised for POWER7. 2 3dnl Copyright 2013 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33C cycles/limb 34C POWER3/PPC630 ? 35C POWER4/PPC970 ? 36C POWER5 ? 37C POWER6 ? 38C POWER7 2.18 39 40C This is a tad bit slower than the cnd_aors_n.asm code, which is of course an 41C anomaly. 42 43ifdef(`OPERATION_add_n',` 44 define(ADDSUBC, adde) 45 define(ADDSUB, addc) 46 define(func, mpn_add_n) 47 define(func_nc, mpn_add_nc) 48 define(GENRVAL, `addi r3, r3, 1') 49 define(SETCBR, `addic r0, $1, -1') 50 define(CLRCB, `addic r0, r0, 0') 51') 52ifdef(`OPERATION_sub_n',` 53 define(ADDSUBC, subfe) 54 define(ADDSUB, subfc) 55 define(func, mpn_sub_n) 56 define(func_nc, mpn_sub_nc) 57 define(GENRVAL, `neg r3, r3') 58 define(SETCBR, `subfic r0, $1, 0') 59 define(CLRCB, `addic r0, r1, -1') 60') 61 62MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc) 63 64C INPUT PARAMETERS 65define(`rp', `r3') 66define(`up', `r4') 67define(`vp', `r5') 68define(`n', `r6') 69 70ASM_START() 71PROLOGUE(func_nc) 72 SETCBR(r7) 73 b L(ent) 74EPILOGUE() 75 76PROLOGUE(func) 77 CLRCB 78L(ent): 79 andi. r7, n, 1 80 beq L(bx0) 81 82L(bx1): ld r7, 0(up) 83 ld r9, 0(vp) 84 ADDSUBC r11, r9, r7 85 std r11, 0(rp) 86 cmpldi cr6, n, 1 87 beq cr6, L(end) 88 addi up, up, 8 89 addi vp, vp, 8 90 addi rp, rp, 8 91 92L(bx0): addi r0, n, 2 C compute branch... 93 srdi r0, r0, 2 C ...count 94 mtctr r0 95 96 andi. r7, n, 2 97 bne L(mid) 98 99 addi up, up, 16 100 addi vp, vp, 16 101 addi rp, rp, 16 102 103 ALIGN(32) 104L(top): ld r6, -16(up) 105 ld r7, -8(up) 106 ld r8, -16(vp) 107 ld r9, -8(vp) 108 ADDSUBC r10, r8, r6 109 ADDSUBC r11, r9, r7 110 std r10, -16(rp) 111 std r11, -8(rp) 112L(mid): ld r6, 0(up) 113 ld r7, 8(up) 114 ld r8, 0(vp) 115 ld r9, 8(vp) 116 ADDSUBC r10, r8, r6 117 ADDSUBC r11, r9, r7 118 std r10, 0(rp) 119 std r11, 8(rp) 120 addi up, up, 32 121 addi vp, vp, 32 122 addi rp, rp, 32 123 bdnz L(top) 124 125L(end): subfe r3, r0, r0 C -cy 126 GENRVAL 127 blr 128EPILOGUE() 129