rshift.asm revision 1.1.1.1
1dnl PowerPC-64 mpn_rshift -- rp[] = up[] << cnt 2 3dnl Copyright 2003, 2005, 2010, 2013 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of the GNU Lesser General Public License as published 9dnl by the Free Software Foundation; either version 3 of the License, or (at 10dnl your option) any later version. 11 12dnl The GNU MP Library is distributed in the hope that it will be useful, but 13dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15dnl License for more details. 16 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22C cycles/limb 23C POWER3/PPC630 ? 24C POWER4/PPC970 ? 25C POWER5 2 26C POWER6 3.5 (mysteriously 3.0 for cnt=1) 27 28C TODO 29C * Micro-optimise header code 30C * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6. The code is 4248 31C bytes, 4-way code would become about 50% larger. 32 33C INPUT PARAMETERS 34define(`rp_param', `r3') 35define(`up', `r4') 36define(`n', `r5') 37define(`cnt', `r6') 38 39define(`tnc',`r0') 40define(`retval',`r3') 41define(`rp', `r7') 42 43ASM_START() 44PROLOGUE(mpn_rshift) 45 46ifdef(`HAVE_ABI_mode32',` 47 rldicl n, n, 0,32 C FIXME: avoid this zero extend 48') 49 mflr r12 50 LEAL( r11, L(e1)) C address of L(e1) label in SHIFT(1) 51 sldi r10, cnt, 6 C multiply cnt by size of a SHIFT block 52 add r11, r11, r10 C address of L(oN) for N = cnt 53 srdi r10, n, 1 54 mr rp, rp_param 55 subfic tnc, cnt, 64 56 rlwinm. r8, n, 0,31,31 C extract bit 0 57 mtctr r10 58 beq L(evn) 59 60L(odd): ld r9, 0(up) 61 cmpdi cr0, n, 1 C n = 1? 62 beq L(1) 63 ld r8, 8(up) 64 addi r11, r11, -84 C L(o1) - L(e1) - 64 65 mtlr r11 66 sld r3, r9, tnc C retval 67 addi up, up, 8 68 addi rp, rp, 8 69 blr C branch to L(oN) 70 71L(evn): ld r8, 0(up) 72 ld r9, 8(up) 73 addi r11, r11, -64 74 mtlr r11 75 sld r3, r8, tnc C retval 76 addi up, up, 16 77 blr C branch to L(eN) 78 79L(1): sld r3, r9, tnc C retval 80 srd r8, r9, cnt 81 std r8, 0(rp) 82 mtlr r12 83ifdef(`HAVE_ABI_mode32', 84` mr r4, r3 85 srdi r3, r3, 32 86') 87 blr 88 89 90define(SHIFT,` 91L(lo$1):ld r8, 0(up) 92 std r11, 0(rp) 93 addi rp, rp, 16 94L(o$1): srdi r10, r9, $1 95 rldimi r10, r8, eval(64-$1), 0 96 ld r9, 8(up) 97 addi up, up, 16 98 std r10, -8(rp) 99L(e$1): srdi r11, r8, $1 100 rldimi r11, r9, eval(64-$1), 0 101 bdnz L(lo$1) 102 std r11, 0(rp) 103 srdi r10, r9, $1 104 b L(com) 105 nop 106 nop 107') 108 109 ALIGN(64) 110forloop(`i',1,63,`SHIFT(i)') 111 112L(com): std r10, 8(rp) 113 mtlr r12 114ifdef(`HAVE_ABI_mode32', 115` mr r4, r3 116 srdi r3, r3, 32 117') 118 blr 119EPILOGUE() 120ASM_END() 121