add_n.asm revision 1.1.1.1
1dnl Intel Pentium-4 mpn_add_n -- mpn addition. 2 3dnl Copyright 2001, 2002 Free Software Foundation, Inc. 4dnl 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or 8dnl modify it under the terms of the GNU Lesser General Public License as 9dnl published by the Free Software Foundation; either version 3 of the 10dnl License, or (at your option) any later version. 11dnl 12dnl The GNU MP Library is distributed in the hope that it will be useful, 13dnl but WITHOUT ANY WARRANTY; without even the implied warranty of 14dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15dnl Lesser General Public License for more details. 16dnl 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22 23C P4 Willamette, Northwood: 4.0 cycles/limb if dst!=src1 and dst!=src2 24C 6.0 cycles/limb if dst==src1 or dst==src2 25C P4 Prescott: >= 5 cycles/limb 26 27C mp_limb_t mpn_add_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, 28C mp_size_t size); 29C mp_limb_t mpn_add_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, 30C mp_size_t size, mp_limb_t carry); 31C 32C The 4 c/l achieved here isn't particularly good, but is better than 9 c/l 33C for a basic adc loop. 34 35defframe(PARAM_CARRY,20) 36defframe(PARAM_SIZE, 16) 37defframe(PARAM_SRC2, 12) 38defframe(PARAM_SRC1, 8) 39defframe(PARAM_DST, 4) 40 41dnl re-use parameter space 42define(SAVE_EBX,`PARAM_SRC1') 43 44 TEXT 45 ALIGN(8) 46 47PROLOGUE(mpn_add_nc) 48deflit(`FRAME',0) 49 50 movd PARAM_CARRY, %mm0 51 jmp L(start_nc) 52 53EPILOGUE() 54 55 ALIGN(8) 56PROLOGUE(mpn_add_n) 57deflit(`FRAME',0) 58 59 pxor %mm0, %mm0 60 61L(start_nc): 62 movl PARAM_SRC1, %eax 63 movl %ebx, SAVE_EBX 64 movl PARAM_SRC2, %ebx 65 movl PARAM_DST, %edx 66 movl PARAM_SIZE, %ecx 67 68 leal (%eax,%ecx,4), %eax C src1 end 69 leal (%ebx,%ecx,4), %ebx C src2 end 70 leal (%edx,%ecx,4), %edx C dst end 71 negl %ecx C -size 72 73L(top): 74 C eax src1 end 75 C ebx src2 end 76 C ecx counter, limbs, negative 77 C edx dst end 78 C mm0 carry bit 79 80 movd (%eax,%ecx,4), %mm1 81 movd (%ebx,%ecx,4), %mm2 82 paddq %mm2, %mm1 83 84 paddq %mm1, %mm0 85 movd %mm0, (%edx,%ecx,4) 86 87 psrlq $32, %mm0 88 89 addl $1, %ecx 90 jnz L(top) 91 92 93 movd %mm0, %eax 94 movl SAVE_EBX, %ebx 95 emms 96 ret 97 98EPILOGUE() 99