1dnl IBM POWER mpn_submul_1 -- Multiply a limb vector with a limb and subtract 2dnl the result from a second limb vector. 3 4dnl Copyright 1992, 1994, 1999-2001 Free Software Foundation, Inc. 5 6dnl This file is part of the GNU MP Library. 7dnl 8dnl The GNU MP Library is free software; you can redistribute it and/or modify 9dnl it under the terms of either: 10dnl 11dnl * the GNU Lesser General Public License as published by the Free 12dnl Software Foundation; either version 3 of the License, or (at your 13dnl option) any later version. 14dnl 15dnl or 16dnl 17dnl * the GNU General Public License as published by the Free Software 18dnl Foundation; either version 2 of the License, or (at your option) any 19dnl later version. 20dnl 21dnl or both in parallel, as here. 22dnl 23dnl The GNU MP Library is distributed in the hope that it will be useful, but 24dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26dnl for more details. 27dnl 28dnl You should have received copies of the GNU General Public License and the 29dnl GNU Lesser General Public License along with the GNU MP Library. If not, 30dnl see https://www.gnu.org/licenses/. 31 32 33dnl INPUT PARAMETERS 34dnl res_ptr r3 35dnl s1_ptr r4 36dnl size r5 37dnl s2_limb r6 38 39dnl The POWER architecture has no unsigned 32x32->64 bit multiplication 40dnl instruction. To obtain that operation, we have to use the 32x32->64 41dnl signed multiplication instruction, and add the appropriate compensation to 42dnl the high limb of the result. We add the multiplicand if the multiplier 43dnl has its most significant bit set, and we add the multiplier if the 44dnl multiplicand has its most significant bit set. We need to preserve the 45dnl carry flag between each iteration, so we have to compute the compensation 46dnl carefully (the natural, srai+and doesn't work). Since all POWER can 47dnl branch in zero cycles, we use conditional branches for the compensation. 48 49include(`../config.m4') 50 51ASM_START() 52PROLOGUE(mpn_submul_1) 53 cal 3,-4(3) 54 l 0,0(4) 55 cmpi 0,6,0 56 mtctr 5 57 mul 9,0,6 58 srai 7,0,31 59 and 7,7,6 60 mfmq 11 61 cax 9,9,7 62 l 7,4(3) 63 sf 8,11,7 C add res_limb 64 a 11,8,11 C invert cy (r11 is junk) 65 blt Lneg 66Lpos: bdz Lend 67 68Lploop: lu 0,4(4) 69 stu 8,4(3) 70 cmpi 0,0,0 71 mul 10,0,6 72 mfmq 0 73 ae 11,0,9 C low limb + old_cy_limb + old cy 74 l 7,4(3) 75 aze 10,10 C propagate cy to new cy_limb 76 sf 8,11,7 C add res_limb 77 a 11,8,11 C invert cy (r11 is junk) 78 bge Lp0 79 cax 10,10,6 C adjust high limb for negative limb from s1 80Lp0: bdz Lend0 81 lu 0,4(4) 82 stu 8,4(3) 83 cmpi 0,0,0 84 mul 9,0,6 85 mfmq 0 86 ae 11,0,10 87 l 7,4(3) 88 aze 9,9 89 sf 8,11,7 90 a 11,8,11 C invert cy (r11 is junk) 91 bge Lp1 92 cax 9,9,6 C adjust high limb for negative limb from s1 93Lp1: bdn Lploop 94 95 b Lend 96 97Lneg: cax 9,9,0 98 bdz Lend 99Lnloop: lu 0,4(4) 100 stu 8,4(3) 101 cmpi 0,0,0 102 mul 10,0,6 103 mfmq 7 104 ae 11,7,9 105 l 7,4(3) 106 ae 10,10,0 C propagate cy to new cy_limb 107 sf 8,11,7 C add res_limb 108 a 11,8,11 C invert cy (r11 is junk) 109 bge Ln0 110 cax 10,10,6 C adjust high limb for negative limb from s1 111Ln0: bdz Lend0 112 lu 0,4(4) 113 stu 8,4(3) 114 cmpi 0,0,0 115 mul 9,0,6 116 mfmq 7 117 ae 11,7,10 118 l 7,4(3) 119 ae 9,9,0 C propagate cy to new cy_limb 120 sf 8,11,7 C add res_limb 121 a 11,8,11 C invert cy (r11 is junk) 122 bge Ln1 123 cax 9,9,6 C adjust high limb for negative limb from s1 124Ln1: bdn Lnloop 125 b Lend 126 127Lend0: cal 9,0(10) 128Lend: st 8,4(3) 129 aze 3,9 130 br 131EPILOGUE(mpn_submul_1) 132