1dnl Alpha mpn_rshift -- Shift a number right. 2 3dnl Copyright 1994, 1995, 2000, 2009 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of the GNU Lesser General Public License as published 9dnl by the Free Software Foundation; either version 3 of the License, or (at 10dnl your option) any later version. 11 12dnl The GNU MP Library is distributed in the hope that it will be useful, but 13dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15dnl License for more details. 16 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22C cycles/limb 23C EV4: ? 24C EV5: 3.25 25C EV6: 1.75 26 27C INPUT PARAMETERS 28C rp r16 29C up r17 30C n r18 31C cnt r19 32 33 34ASM_START() 35PROLOGUE(mpn_rshift) 36 ldq r4,0(r17) C load first limb 37 subq r31,r19,r20 38 subq r18,1,r18 39 and r18,4-1,r28 C number of limbs in first loop 40 sll r4,r20,r0 C compute function result 41 42 beq r28,L(L0) 43 subq r18,r28,r18 44 45 ALIGN(8) 46L(top0): 47 ldq r3,8(r17) 48 addq r16,8,r16 49 srl r4,r19,r5 50 addq r17,8,r17 51 subq r28,1,r28 52 sll r3,r20,r6 53 bis r3,r3,r4 54 bis r5,r6,r8 55 stq r8,-8(r16) 56 bne r28,L(top0) 57 58L(L0): srl r4,r19,r24 59 beq r18,L(end) 60C warm up phase 1 61 ldq r1,8(r17) 62 subq r18,4,r18 63 ldq r2,16(r17) 64 ldq r3,24(r17) 65 ldq r4,32(r17) 66C warm up phase 2 67 sll r1,r20,r7 68 srl r1,r19,r21 69 sll r2,r20,r8 70 beq r18,L(end1) 71 ldq r1,40(r17) 72 srl r2,r19,r22 73 ldq r2,48(r17) 74 sll r3,r20,r5 75 bis r7,r24,r7 76 srl r3,r19,r23 77 bis r8,r21,r8 78 sll r4,r20,r6 79 ldq r3,56(r17) 80 srl r4,r19,r24 81 ldq r4,64(r17) 82 subq r18,4,r18 83 beq r18,L(end2) 84 ALIGN(16) 85C main loop 86L(top): stq r7,0(r16) 87 bis r5,r22,r5 88 stq r8,8(r16) 89 bis r6,r23,r6 90 91 sll r1,r20,r7 92 subq r18,4,r18 93 srl r1,r19,r21 94 unop C ldq r31,-96(r17) 95 96 sll r2,r20,r8 97 ldq r1,72(r17) 98 srl r2,r19,r22 99 ldq r2,80(r17) 100 101 stq r5,16(r16) 102 bis r7,r24,r7 103 stq r6,24(r16) 104 bis r8,r21,r8 105 106 sll r3,r20,r5 107 unop C ldq r31,-96(r17) 108 srl r3,r19,r23 109 addq r16,32,r16 110 111 sll r4,r20,r6 112 ldq r3,88(r17) 113 srl r4,r19,r24 114 ldq r4,96(r17) 115 116 addq r17,32,r17 117 bne r18,L(top) 118C cool down phase 2/1 119L(end2): 120 stq r7,0(r16) 121 bis r5,r22,r5 122 stq r8,8(r16) 123 bis r6,r23,r6 124 sll r1,r20,r7 125 srl r1,r19,r21 126 sll r2,r20,r8 127 srl r2,r19,r22 128 stq r5,16(r16) 129 bis r7,r24,r7 130 stq r6,24(r16) 131 bis r8,r21,r8 132 sll r3,r20,r5 133 srl r3,r19,r23 134 sll r4,r20,r6 135 srl r4,r19,r24 136C cool down phase 2/2 137 stq r7,32(r16) 138 bis r5,r22,r5 139 stq r8,40(r16) 140 bis r6,r23,r6 141 stq r5,48(r16) 142 stq r6,56(r16) 143C cool down phase 2/3 144 stq r24,64(r16) 145 ret r31,(r26),1 146 147C cool down phase 1/1 148L(end1): 149 srl r2,r19,r22 150 sll r3,r20,r5 151 bis r7,r24,r7 152 srl r3,r19,r23 153 bis r8,r21,r8 154 sll r4,r20,r6 155 srl r4,r19,r24 156C cool down phase 1/2 157 stq r7,0(r16) 158 bis r5,r22,r5 159 stq r8,8(r16) 160 bis r6,r23,r6 161 stq r5,16(r16) 162 stq r6,24(r16) 163 stq r24,32(r16) 164 ret r31,(r26),1 165 166L(end): stq r24,0(r16) 167 ret r31,(r26),1 168EPILOGUE(mpn_rshift) 169ASM_END() 170