lshiftc.asm revision 1.1.1.1
1dnl AMD64 mpn_lshiftc -- mpn left shift with complement. 2 3dnl Copyright 2003, 2005, 2006, 2009 Free Software Foundation, Inc. 4dnl 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or 8dnl modify it under the terms of the GNU Lesser General Public License as 9dnl published by the Free Software Foundation; either version 3 of the 10dnl License, or (at your option) any later version. 11dnl 12dnl The GNU MP Library is distributed in the hope that it will be useful, 13dnl but WITHOUT ANY WARRANTY; without even the implied warranty of 14dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15dnl Lesser General Public License for more details. 16dnl 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22 23C cycles/limb 24C K8,K9: 2.75 25C K10: 2.75 26C P4: ? 27C P6-15 (Core2): ? 28C P6-28 (Atom): ? 29 30 31C INPUT PARAMETERS 32define(`rp', `%rdi') 33define(`up', `%rsi') 34define(`n', `%rdx') 35define(`cnt', `%rcx') 36 37ASM_START() 38 TEXT 39 ALIGN(32) 40PROLOGUE(mpn_lshiftc) 41 neg R32(%rcx) C put rsh count in cl 42 mov -8(up,n,8), %rax 43 shr R8(%rcx), %rax C function return value 44 45 neg R32(%rcx) C put lsh count in cl 46 lea 1(n), R32(%r8) 47 and $3, R32(%r8) 48 je L(rlx) C jump for n = 3, 7, 11, ... 49 50 dec R32(%r8) 51 jne L(1) 52C n = 4, 8, 12, ... 53 mov -8(up,n,8), %r10 54 shl R8(%rcx), %r10 55 neg R32(%rcx) C put rsh count in cl 56 mov -16(up,n,8), %r8 57 shr R8(%rcx), %r8 58 or %r8, %r10 59 not %r10 60 mov %r10, -8(rp,n,8) 61 dec n 62 jmp L(rll) 63 64L(1): dec R32(%r8) 65 je L(1x) C jump for n = 1, 5, 9, 13, ... 66C n = 2, 6, 10, 16, ... 67 mov -8(up,n,8), %r10 68 shl R8(%rcx), %r10 69 neg R32(%rcx) C put rsh count in cl 70 mov -16(up,n,8), %r8 71 shr R8(%rcx), %r8 72 or %r8, %r10 73 not %r10 74 mov %r10, -8(rp,n,8) 75 dec n 76 neg R32(%rcx) C put lsh count in cl 77L(1x): 78 cmp $1, n 79 je L(ast) 80 mov -8(up,n,8), %r10 81 shl R8(%rcx), %r10 82 mov -16(up,n,8), %r11 83 shl R8(%rcx), %r11 84 neg R32(%rcx) C put rsh count in cl 85 mov -16(up,n,8), %r8 86 mov -24(up,n,8), %r9 87 shr R8(%rcx), %r8 88 or %r8, %r10 89 shr R8(%rcx), %r9 90 or %r9, %r11 91 not %r10 92 not %r11 93 mov %r10, -8(rp,n,8) 94 mov %r11, -16(rp,n,8) 95 sub $2, n 96 97L(rll): neg R32(%rcx) C put lsh count in cl 98L(rlx): mov -8(up,n,8), %r10 99 shl R8(%rcx), %r10 100 mov -16(up,n,8), %r11 101 shl R8(%rcx), %r11 102 103 sub $4, n C 4 104 jb L(end) C 2 105 ALIGN(16) 106L(top): 107 C finish stuff from lsh block 108 neg R32(%rcx) C put rsh count in cl 109 mov 16(up,n,8), %r8 110 mov 8(up,n,8), %r9 111 shr R8(%rcx), %r8 112 or %r8, %r10 113 shr R8(%rcx), %r9 114 or %r9, %r11 115 not %r10 116 not %r11 117 mov %r10, 24(rp,n,8) 118 mov %r11, 16(rp,n,8) 119 C start two new rsh 120 mov 0(up,n,8), %r8 121 mov -8(up,n,8), %r9 122 shr R8(%rcx), %r8 123 shr R8(%rcx), %r9 124 125 C finish stuff from rsh block 126 neg R32(%rcx) C put lsh count in cl 127 mov 8(up,n,8), %r10 128 mov 0(up,n,8), %r11 129 shl R8(%rcx), %r10 130 or %r10, %r8 131 shl R8(%rcx), %r11 132 or %r11, %r9 133 not %r8 134 not %r9 135 mov %r8, 8(rp,n,8) 136 mov %r9, 0(rp,n,8) 137 C start two new lsh 138 mov -8(up,n,8), %r10 139 mov -16(up,n,8), %r11 140 shl R8(%rcx), %r10 141 shl R8(%rcx), %r11 142 143 sub $4, n 144 jae L(top) C 2 145L(end): 146 neg R32(%rcx) C put rsh count in cl 147 mov 16(up,n,8), %r8 148 shr R8(%rcx), %r8 149 or %r8, %r10 150 mov 8(up,n,8), %r9 151 shr R8(%rcx), %r9 152 or %r9, %r11 153 not %r10 154 not %r11 155 mov %r10, 24(rp,n,8) 156 mov %r11, 16(rp,n,8) 157 158 neg R32(%rcx) C put lsh count in cl 159L(ast): mov (up), %r10 160 shl R8(%rcx), %r10 161 not %r10 162 mov %r10, (rp) 163 ret 164EPILOGUE() 165