1dnl PowerPC 750 mpn_rshift -- mpn right shift. 2 3dnl Copyright 2002, 2003 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of the GNU Lesser General Public License as published 9dnl by the Free Software Foundation; either version 3 of the License, or (at 10dnl your option) any later version. 11 12dnl The GNU MP Library is distributed in the hope that it will be useful, but 13dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15dnl License for more details. 16 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22 23C cycles/limb 24C 750: 3.0 25C 7400: 3.0 26 27 28C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size, 29C unsigned shift); 30C 31C This code is the same per-limb speed as mpn/powerpc32/rshift.asm, but 32C smaller and saving about 30 or so cycles of overhead. 33 34ASM_START() 35PROLOGUE(mpn_rshift) 36 37 C r3 dst 38 C r4 src 39 C r5 size 40 C r6 shift 41 42 mtctr r5 C size 43 lwz r8, 0(r4) C src[0] 44 45 subfic r7, r6, 32 C 32-shift 46 addi r5, r3, -4 C dst-4 47 48 slw r3, r8, r7 C return value 49 bdz L(one) 50 51 lwzu r9, 4(r4) C src[1] 52 srw r8, r8, r6 C src[0] >> shift 53 bdz L(two) 54 55 56L(top): 57 C r3 return value 58 C r4 src, incrementing 59 C r5 dst, incrementing 60 C r6 shift 61 C r7 32-shift 62 C r8 src[i-1] >> shift 63 C r9 src[i] 64 C r10 65 66 lwzu r10, 4(r4) 67 slw r11, r9, r7 68 69 or r8, r8, r11 70 stwu r8, 4(r5) 71 72 srw r8, r9, r6 73 bdz L(odd) 74 75 C r8 src[i-1] >> shift 76 C r9 77 C r10 src[i] 78 79 lwzu r9, 4(r4) 80 slw r11, r10, r7 81 82 or r8, r8, r11 83 stwu r8, 4(r5) 84 85 srw r8, r10, r6 86 bdnz L(top) 87 88 89L(two): 90 C r3 return value 91 C r4 92 C r5 &dst[size-2] 93 C r6 shift 94 C r7 32-shift 95 C r8 src[size-2] >> shift 96 C r9 src[size-1] 97 C r10 98 99 slw r11, r9, r7 100 srw r12, r9, r6 C src[size-1] >> shift 101 102 or r8, r8, r11 103 stw r12, 8(r5) C dst[size-1] 104 105 stw r8, 4(r5) C dst[size-2] 106 blr 107 108 109L(odd): 110 C r3 return value 111 C r4 112 C r5 &dst[size-2] 113 C r6 shift 114 C r7 32-shift 115 C r8 src[size-2] >> shift 116 C r9 117 C r10 src[size-1] 118 119 slw r11, r10, r7 120 srw r12, r10, r6 121 122 or r8, r8, r11 123 stw r12, 8(r5) C dst[size-1] 124 125 stw r8, 4(r5) C dst[size-2] 126 blr 127 128 129L(one): 130 C r3 return value 131 C r4 132 C r5 dst-4 133 C r6 shift 134 C r7 135 C r8 src[0] 136 137 srw r8, r8, r6 138 139 stw r8, 4(r5) C dst[0] 140 blr 141 142EPILOGUE(mpn_rshift) 143