1dnl AMD64 mpn_rshift -- mpn right shift. 2 3dnl Copyright 2003, 2005, 2009 Free Software Foundation, Inc. 4dnl 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or 8dnl modify it under the terms of the GNU Lesser General Public License as 9dnl published by the Free Software Foundation; either version 3 of the 10dnl License, or (at your option) any later version. 11dnl 12dnl The GNU MP Library is distributed in the hope that it will be useful, 13dnl but WITHOUT ANY WARRANTY; without even the implied warranty of 14dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15dnl Lesser General Public License for more details. 16dnl 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22 23C cycles/limb 24C K8,K9: 2.375 25C K10: 2.375 26C P4: 8 27C P6-15 (Core2): 2.11 28C P6-28 (Atom): 5.75 29 30 31C INPUT PARAMETERS 32define(`rp', `%rdi') 33define(`up', `%rsi') 34define(`n', `%rdx') 35define(`cnt', `%rcx') 36 37ASM_START() 38 TEXT 39 ALIGN(32) 40PROLOGUE(mpn_rshift) 41 neg R32(%rcx) C put rsh count in cl 42 mov (up), %rax 43 shl R8(%rcx), %rax C function return value 44 neg R32(%rcx) C put lsh count in cl 45 46 lea 1(n), R32(%r8) 47 48 lea -8(up,n,8), up 49 lea -8(rp,n,8), rp 50 neg n 51 52 and $3, R32(%r8) 53 je L(rlx) C jump for n = 3, 7, 11, ... 54 55 dec R32(%r8) 56 jne L(1) 57C n = 4, 8, 12, ... 58 mov 8(up,n,8), %r10 59 shr R8(%rcx), %r10 60 neg R32(%rcx) C put rsh count in cl 61 mov 16(up,n,8), %r8 62 shl R8(%rcx), %r8 63 or %r8, %r10 64 mov %r10, 8(rp,n,8) 65 inc n 66 jmp L(rll) 67 68L(1): dec R32(%r8) 69 je L(1x) C jump for n = 1, 5, 9, 13, ... 70C n = 2, 6, 10, 16, ... 71 mov 8(up,n,8), %r10 72 shr R8(%rcx), %r10 73 neg R32(%rcx) C put rsh count in cl 74 mov 16(up,n,8), %r8 75 shl R8(%rcx), %r8 76 or %r8, %r10 77 mov %r10, 8(rp,n,8) 78 inc n 79 neg R32(%rcx) C put lsh count in cl 80L(1x): 81 cmp $-1, n 82 je L(ast) 83 mov 8(up,n,8), %r10 84 shr R8(%rcx), %r10 85 mov 16(up,n,8), %r11 86 shr R8(%rcx), %r11 87 neg R32(%rcx) C put rsh count in cl 88 mov 16(up,n,8), %r8 89 mov 24(up,n,8), %r9 90 shl R8(%rcx), %r8 91 or %r8, %r10 92 shl R8(%rcx), %r9 93 or %r9, %r11 94 mov %r10, 8(rp,n,8) 95 mov %r11, 16(rp,n,8) 96 add $2, n 97 98L(rll): neg R32(%rcx) C put lsh count in cl 99L(rlx): mov 8(up,n,8), %r10 100 shr R8(%rcx), %r10 101 mov 16(up,n,8), %r11 102 shr R8(%rcx), %r11 103 104 add $4, n C 4 105 jb L(end) C 2 106 ALIGN(16) 107L(top): 108 C finish stuff from lsh block 109 neg R32(%rcx) C put rsh count in cl 110 mov -16(up,n,8), %r8 111 mov -8(up,n,8), %r9 112 shl R8(%rcx), %r8 113 or %r8, %r10 114 shl R8(%rcx), %r9 115 or %r9, %r11 116 mov %r10, -24(rp,n,8) 117 mov %r11, -16(rp,n,8) 118 C start two new rsh 119 mov (up,n,8), %r8 120 mov 8(up,n,8), %r9 121 shl R8(%rcx), %r8 122 shl R8(%rcx), %r9 123 124 C finish stuff from rsh block 125 neg R32(%rcx) C put lsh count in cl 126 mov -8(up,n,8), %r10 127 mov 0(up,n,8), %r11 128 shr R8(%rcx), %r10 129 or %r10, %r8 130 shr R8(%rcx), %r11 131 or %r11, %r9 132 mov %r8, -8(rp,n,8) 133 mov %r9, 0(rp,n,8) 134 C start two new lsh 135 mov 8(up,n,8), %r10 136 mov 16(up,n,8), %r11 137 shr R8(%rcx), %r10 138 shr R8(%rcx), %r11 139 140 add $4, n 141 jae L(top) C 2 142L(end): 143 neg R32(%rcx) C put rsh count in cl 144 mov -8(up), %r8 145 shl R8(%rcx), %r8 146 or %r8, %r10 147 mov (up), %r9 148 shl R8(%rcx), %r9 149 or %r9, %r11 150 mov %r10, -16(rp) 151 mov %r11, -8(rp) 152 153 neg R32(%rcx) C put lsh count in cl 154L(ast): mov (up), %r10 155 shr R8(%rcx), %r10 156 mov %r10, (rp) 157 ret 158EPILOGUE() 159