aorrlshC_n.asm revision 1.1.1.1
1dnl Intel Atom mpn_addlshC_n/mpn_rsblshC_n -- rp[] = (vp[] << C) +- up[] 2 3dnl Contributed to the GNU project by Marco Bodrato. 4 5dnl Copyright 2011 Free Software Foundation, Inc. 6dnl 7dnl This file is part of the GNU MP Library. 8dnl 9dnl The GNU MP Library is free software; you can redistribute it and/or 10dnl modify it under the terms of the GNU Lesser General Public License as 11dnl published by the Free Software Foundation; either version 3 of the 12dnl License, or (at your option) any later version. 13dnl 14dnl The GNU MP Library is distributed in the hope that it will be useful, 15dnl but WITHOUT ANY WARRANTY; without even the implied warranty of 16dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17dnl Lesser General Public License for more details. 18dnl 19dnl You should have received a copy of the GNU Lesser General Public License 20dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 21 22include(`../config.m4') 23 24C mp_limb_t mpn_addlshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, 25C mp_size_t size); 26C mp_limb_t mpn_addlshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, 27C mp_size_t size, mp_limb_t carry); 28C mp_limb_t mpn_rsblshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, 29C mp_size_t size); 30C mp_limb_t mpn_rsblshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, 31C mp_size_t size, mp_signed_limb_t carry); 32 33C cycles/limb 34C P5 35C P6 model 0-8,10-12 36C P6 model 9 (Banias) 37C P6 model 13 (Dothan) 38C P4 model 0 (Willamette) 39C P4 model 1 (?) 40C P4 model 2 (Northwood) 41C P4 model 3 (Prescott) 42C P4 model 4 (Nocona) 43C Intel Atom 6 44C AMD K6 45C AMD K7 46C AMD K8 47C AMD K10 48 49defframe(PARAM_CORB, 20) 50defframe(PARAM_SIZE, 16) 51defframe(PARAM_DBLD, 12) 52defframe(PARAM_SRC, 8) 53defframe(PARAM_DST, 4) 54 55dnl re-use parameter space 56define(VAR_COUNT,`PARAM_SIZE') 57define(SAVE_EBP,`PARAM_DBLD') 58define(SAVE_VP,`PARAM_SRC') 59define(SAVE_UP,`PARAM_DST') 60 61define(M, eval(m4_lshift(1,LSH))) 62define(`rp', `%edi') 63define(`up', `%esi') 64define(`vp', `%ebx') 65 66ASM_START() 67 TEXT 68 ALIGN(8) 69 70PROLOGUE(M4_function_c) 71deflit(`FRAME',0) 72 movl PARAM_CORB, %eax 73 movl %eax, %edx 74 shr $LSH, %edx 75 andl $1, %edx 76 M4_opp %edx, %eax 77 jmp L(start_nc) 78EPILOGUE() 79 80PROLOGUE(M4_function) 81deflit(`FRAME',0) 82 83 xor %eax, %eax 84 xor %edx, %edx 85L(start_nc): 86 push rp FRAME_pushl() 87 88 mov PARAM_SIZE, %ecx C size 89 mov PARAM_DST, rp 90 mov up, SAVE_UP 91 incl %ecx C size + 1 92 mov PARAM_SRC, up 93 mov vp, SAVE_VP 94 shr %ecx C (size+1)\2 95 mov PARAM_DBLD, vp 96 mov %ebp, SAVE_EBP 97 mov %ecx, VAR_COUNT 98 jnc L(entry) C size odd 99 100 shr %edx C size even 101 mov (vp), %ecx 102 lea 4(vp), vp 103 lea (%eax,%ecx,M), %edx 104 mov %ecx, %eax 105 lea -4(up), up 106 lea -4(rp), rp 107 jmp L(enteven) 108 109 ALIGN(16) 110L(oop): 111 lea (%eax,%ecx,M), %ebp 112 shr $RSH, %ecx 113 mov 4(vp), %eax 114 shr %edx 115 lea 8(vp), vp 116 M4_inst (up), %ebp 117 lea (%ecx,%eax,M), %edx 118 mov %ebp, (rp) 119L(enteven): 120 M4_inst 4(up), %edx 121 lea 8(up), up 122 mov %edx, 4(rp) 123 adc %edx, %edx 124 shr $RSH, %eax 125 lea 8(rp), rp 126L(entry): 127 mov (vp), %ecx 128 decl VAR_COUNT 129 jnz L(oop) 130 131 lea (%eax,%ecx,M), %ebp 132 shr $RSH, %ecx 133 shr %edx 134 mov SAVE_VP, vp 135 M4_inst (up), %ebp 136 mov %ecx, %eax 137 mov SAVE_UP, up 138 M4_inst $0, %eax 139 mov %ebp, (rp) 140 mov SAVE_EBP, %ebp 141 pop rp FRAME_popl() 142 ret 143EPILOGUE() 144 145ASM_END() 146