aorrlshC_n.asm revision 1.1.1.2
1dnl Intel Atom mpn_addlshC_n/mpn_rsblshC_n -- rp[] = (vp[] << C) +- up[] 2 3dnl Contributed to the GNU project by Marco Bodrato. 4 5dnl Copyright 2011 Free Software Foundation, Inc. 6 7dnl This file is part of the GNU MP Library. 8dnl 9dnl The GNU MP Library is free software; you can redistribute it and/or modify 10dnl it under the terms of either: 11dnl 12dnl * the GNU Lesser General Public License as published by the Free 13dnl Software Foundation; either version 3 of the License, or (at your 14dnl option) any later version. 15dnl 16dnl or 17dnl 18dnl * the GNU General Public License as published by the Free Software 19dnl Foundation; either version 2 of the License, or (at your option) any 20dnl later version. 21dnl 22dnl or both in parallel, as here. 23dnl 24dnl The GNU MP Library is distributed in the hope that it will be useful, but 25dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27dnl for more details. 28dnl 29dnl You should have received copies of the GNU General Public License and the 30dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31dnl see https://www.gnu.org/licenses/. 32 33include(`../config.m4') 34 35C mp_limb_t mpn_addlshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, 36C mp_size_t size); 37C mp_limb_t mpn_addlshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, 38C mp_size_t size, mp_limb_t carry); 39C mp_limb_t mpn_rsblshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, 40C mp_size_t size); 41C mp_limb_t mpn_rsblshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, 42C mp_size_t size, mp_signed_limb_t carry); 43 44C cycles/limb 45C P5 46C P6 model 0-8,10-12 47C P6 model 9 (Banias) 48C P6 model 13 (Dothan) 49C P4 model 0 (Willamette) 50C P4 model 1 (?) 51C P4 model 2 (Northwood) 52C P4 model 3 (Prescott) 53C P4 model 4 (Nocona) 54C Intel Atom 6 55C AMD K6 56C AMD K7 57C AMD K8 58C AMD K10 59 60defframe(PARAM_CORB, 20) 61defframe(PARAM_SIZE, 16) 62defframe(PARAM_DBLD, 12) 63defframe(PARAM_SRC, 8) 64defframe(PARAM_DST, 4) 65 66dnl re-use parameter space 67define(VAR_COUNT,`PARAM_SIZE') 68define(SAVE_EBP,`PARAM_DBLD') 69define(SAVE_VP,`PARAM_SRC') 70define(SAVE_UP,`PARAM_DST') 71 72define(M, eval(m4_lshift(1,LSH))) 73define(`rp', `%edi') 74define(`up', `%esi') 75define(`vp', `%ebx') 76 77ASM_START() 78 TEXT 79 ALIGN(8) 80 81PROLOGUE(M4_function_c) 82deflit(`FRAME',0) 83 movl PARAM_CORB, %eax 84 movl %eax, %edx 85 shr $LSH, %edx 86 andl $1, %edx 87 M4_opp %edx, %eax 88 jmp L(start_nc) 89EPILOGUE() 90 91PROLOGUE(M4_function) 92deflit(`FRAME',0) 93 94 xor %eax, %eax 95 xor %edx, %edx 96L(start_nc): 97 push rp FRAME_pushl() 98 99 mov PARAM_SIZE, %ecx C size 100 mov PARAM_DST, rp 101 mov up, SAVE_UP 102 incl %ecx C size + 1 103 mov PARAM_SRC, up 104 mov vp, SAVE_VP 105 shr %ecx C (size+1)\2 106 mov PARAM_DBLD, vp 107 mov %ebp, SAVE_EBP 108 mov %ecx, VAR_COUNT 109 jnc L(entry) C size odd 110 111 shr %edx C size even 112 mov (vp), %ecx 113 lea 4(vp), vp 114 lea (%eax,%ecx,M), %edx 115 mov %ecx, %eax 116 lea -4(up), up 117 lea -4(rp), rp 118 jmp L(enteven) 119 120 ALIGN(16) 121L(oop): 122 lea (%eax,%ecx,M), %ebp 123 shr $RSH, %ecx 124 mov 4(vp), %eax 125 shr %edx 126 lea 8(vp), vp 127 M4_inst (up), %ebp 128 lea (%ecx,%eax,M), %edx 129 mov %ebp, (rp) 130L(enteven): 131 M4_inst 4(up), %edx 132 lea 8(up), up 133 mov %edx, 4(rp) 134 adc %edx, %edx 135 shr $RSH, %eax 136 lea 8(rp), rp 137L(entry): 138 mov (vp), %ecx 139 decl VAR_COUNT 140 jnz L(oop) 141 142 lea (%eax,%ecx,M), %ebp 143 shr $RSH, %ecx 144 shr %edx 145 mov SAVE_VP, vp 146 M4_inst (up), %ebp 147 mov %ecx, %eax 148 mov SAVE_UP, up 149 M4_inst $0, %eax 150 mov %ebp, (rp) 151 mov SAVE_EBP, %ebp 152 pop rp FRAME_popl() 153 ret 154EPILOGUE() 155 156ASM_END() 157