1dnl POWER9 mpn_addmul_1 and mpn_submul_1. 2 3dnl Copyright 2018 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33C mpn_addmul_1 mpn_submul_1 34C cycles/limb cycles/limb 35C POWER3/PPC630 - - 36C POWER4/PPC970 - - 37C POWER5 - - 38C POWER6 - - 39C POWER7 - - 40C POWER8 - - 41C POWER9 2.63 2.63 42 43C INPUT PARAMETERS 44define(`rp', `r3') 45define(`up', `r4') 46define(`n', `r5') 47define(`v0', `r6') 48 49 50ifdef(`OPERATION_addmul_1',` 51 define(`ADDSUBC', adde) 52 define(`ADDSUB', addc) 53 define(`func', mpn_addmul_1) 54 define(`AM', `$1') 55 define(`SM', `') 56') 57ifdef(`OPERATION_submul_1',` 58 define(`ADDSUBC', subfe) 59 define(`ADDSUB', subfc) 60 define(`func', mpn_submul_1) 61 define(`AM', `') 62 define(`SM', `$1') 63') 64 65MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1) 66 67ASM_START() 68PROLOGUE(func) 69 cmpdi cr7, n, 3 70 srdi r10, n, 2 71 mtctr r10 72 rldicl. r9, n, 0, 63 73 ld r11, 0(up) 74 bne cr0, L(bx1) 75 76L(bx0): rldicl. r9, n, 63, 63 77AM(` subfzeo r12, n ') C ov = 0, ca = 0 78AM(` li r12, 0 ') 79SM(` subfco r12, r12, r12 ') C r12 = 0, ov = 0, ca = 1 80 ld r9, 8(up) 81 mulld r0, r11, v0 82 mulhdu r5, r11, v0 83 blt cr7, L(2) 84 ld r8, 16(up) 85 bne cr0, L(b10) 86 87L(b00): addi rp, rp, -24 88 b L(lo0) 89L(b10): addi rp, rp, -8 90 addi up, up, 16 91 b L(lo2) 92 93L(2): addi rp, rp, -8 94 b L(cj2) 95 96L(bx1): rldicl. r9, n, 63, 63 97AM(` subfzeo r5, n ') C ov = 0, ca = 0 98AM(` li r5, 0 ') 99SM(` subfco r5, r5, r5 ') C r5 = 0, ov = 0, ca = 1 100 blt cr7, L(1) 101 ld r8, 8(up) 102 mulld r7, r11, v0 103 mulhdu r12, r11, v0 104 ld r9, 16(up) 105 bne cr0, L(b11) 106 107L(b01): addi rp, rp, -16 108 addi up, up, 8 109 b L(lo1) 110 111L(1): mulld r7, r11, v0 112 mulhdu r12, r11, v0 113 ld r11, 0(rp) 114 ADDSUB r10, r7, r11 115 std r10, 0(rp) 116AM(` addze r3, r12 ') 117SM(` subfe r0, r0, r0 ') 118SM(` sub r3, r12, r0 ') 119 blr 120 121L(b11): addi up, up, 24 122 ble cr7, L(end) 123 124 ALIGN(16) 125L(top): ld r11, 0(rp) 126 mulld r0, r8, v0 127 addex( r7, r7, r5, 0) 128 mulhdu r5, r8, v0 129 ld r8, 0(up) 130 ADDSUBC r10, r7, r11 131 std r10, 0(rp) 132L(lo2): ld r11, 8(rp) 133 mulld r7, r9, v0 134 addex( r0, r0, r12, 0) 135 mulhdu r12, r9, v0 136 ld r9, 8(up) 137 ADDSUBC r10, r0, r11 138 std r10, 8(rp) 139L(lo1): ld r11, 16(rp) 140 mulld r0, r8, v0 141 addex( r7, r7, r5, 0) 142 mulhdu r5, r8, v0 143 ld r8, 16(up) 144 ADDSUBC r10, r7, r11 145 std r10, 16(rp) 146L(lo0): ld r11, 24(rp) 147 mulld r7, r9, v0 148 addex( r0, r0, r12, 0) 149 mulhdu r12, r9, v0 150 ld r9, 24(up) 151 ADDSUBC r10, r0, r11 152 std r10, 24(rp) 153 addi up, up, 32 154 addi rp, rp, 32 155 bdnz L(top) 156 157L(end): ld r11, 0(rp) 158 mulld r0, r8, v0 159 addex( r7, r7, r5, 0) 160 mulhdu r5, r8, v0 161 ADDSUBC r10, r7, r11 162 std r10, 0(rp) 163L(cj2): ld r11, 8(rp) 164 mulld r7, r9, v0 165 addex( r0, r0, r12, 0) 166 mulhdu r12, r9, v0 167 ADDSUBC r10, r0, r11 168 std r10, 8(rp) 169 ld r11, 16(rp) 170 addex( r7, r7, r5, 0) 171 ADDSUBC r10, r7, r11 172 std r10, 16(rp) 173 li r0, 0 174 addex( r3, r12, r0, 0) 175AM(` addze r3, r3 ') 176SM(` subfe r0, r0, r0 ') 177SM(` sub r3, r3, r0 ') 178 blr 179EPILOGUE() 180