1dnl PowerPC-32 mpn_rshift -- Shift a number right. 2 3dnl Copyright 1995, 1998, 2000, 2002-2005 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33C cycles/limb 34C 603e: ? 35C 604e: 3.0 36C 75x (G3): 3.0 37C 7400,7410 (G4): 3.0 38C 7445,7455 (G4+): 2.5 39C 7447,7457 (G4+): 2.25 40C power4/ppc970: 2.5 41C power5: 2.5 42 43C INPUT PARAMETERS 44C rp r3 45C up r4 46C n r5 47C cnt r6 48 49ASM_START() 50PROLOGUE(mpn_rshift) 51 cmpwi cr0, r5, 30 C more than 30 limbs? 52 addi r7, r3, -4 C dst-4 53 bgt L(BIG) C branch if more than 12 limbs 54 55 mtctr r5 C copy size into CTR 56 subfic r8, r6, 32 57 lwz r11, 0(r4) C load first s1 limb 58 slw r3, r11, r8 C compute function return value 59 bdz L(end1) 60 61L(oop): lwzu r10, 4(r4) 62 srw r9, r11, r6 63 slw r12, r10, r8 64 or r9, r9, r12 65 stwu r9, 4(r7) 66 bdz L(end2) 67 lwzu r11, 4(r4) 68 srw r9, r10, r6 69 slw r12, r11, r8 70 or r9, r9, r12 71 stwu r9, 4(r7) 72 bdnz L(oop) 73 74L(end1): 75 srw r0, r11, r6 76 stw r0, 4(r7) 77 blr 78L(end2): 79 srw r0, r10, r6 80 stw r0, 4(r7) 81 blr 82 83L(BIG): 84 stwu r1, -48(r1) 85 stmw r24, 8(r1) C save registers we are supposed to preserve 86 lwz r9, 0(r4) 87 subfic r8, r6, 32 88 slw r3, r9, r8 C compute function return value 89 srw r0, r9, r6 90 addi r5, r5, -1 91 92 andi. r10, r5, 3 C count for spill loop 93 beq L(e) 94 mtctr r10 95 lwzu r28, 4(r4) 96 bdz L(xe0) 97 98L(loop0): 99 srw r12, r28, r6 100 slw r24, r28, r8 101 lwzu r28, 4(r4) 102 or r24, r0, r24 103 stwu r24, 4(r7) 104 mr r0, r12 105 bdnz L(loop0) C taken at most once! 106 107L(xe0): srw r12, r28, r6 108 slw r24, r28, r8 109 or r24, r0, r24 110 stwu r24, 4(r7) 111 mr r0, r12 112 113L(e): srwi r5, r5, 2 C count for unrolled loop 114 addi r5, r5, -1 115 mtctr r5 116 lwz r28, 4(r4) 117 lwz r29, 8(r4) 118 lwz r30, 12(r4) 119 lwzu r31, 16(r4) 120 121L(loopU): 122 srw r9, r28, r6 123 slw r24, r28, r8 124 lwz r28, 4(r4) 125 srw r10, r29, r6 126 slw r25, r29, r8 127 lwz r29, 8(r4) 128 srw r11, r30, r6 129 slw r26, r30, r8 130 lwz r30, 12(r4) 131 srw r12, r31, r6 132 slw r27, r31, r8 133 lwzu r31, 16(r4) 134 or r24, r0, r24 135 stw r24, 4(r7) 136 or r25, r9, r25 137 stw r25, 8(r7) 138 or r26, r10, r26 139 stw r26, 12(r7) 140 or r27, r11, r27 141 stwu r27, 16(r7) 142 mr r0, r12 143 bdnz L(loopU) 144 145 srw r9, r28, r6 146 slw r24, r28, r8 147 srw r10, r29, r6 148 slw r25, r29, r8 149 srw r11, r30, r6 150 slw r26, r30, r8 151 srw r12, r31, r6 152 slw r27, r31, r8 153 or r24, r0, r24 154 stw r24, 4(r7) 155 or r25, r9, r25 156 stw r25, 8(r7) 157 or r26, r10, r26 158 stw r26, 12(r7) 159 or r27, r11, r27 160 stw r27, 16(r7) 161 162 stw r12, 20(r7) 163 lmw r24, 8(r1) C restore registers 164 addi r1, r1, 48 165 blr 166EPILOGUE() 167