1dnl x86 __gmpn_addmul_1 (for 386 and 486) -- Multiply a limb vector with a 2dnl limb and add the result to a second limb vector. 3 4dnl Copyright 1992, 1994, 1997, 1999, 2000, 2001, 2002, 2005 Free Software 5dnl Foundation, Inc. 6dnl 7dnl This file is part of the GNU MP Library. 8dnl 9dnl The GNU MP Library is free software; you can redistribute it and/or 10dnl modify it under the terms of the GNU Lesser General Public License as 11dnl published by the Free Software Foundation; either version 3 of the 12dnl License, or (at your option) any later version. 13dnl 14dnl The GNU MP Library is distributed in the hope that it will be useful, 15dnl but WITHOUT ANY WARRANTY; without even the implied warranty of 16dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17dnl Lesser General Public License for more details. 18dnl 19dnl You should have received a copy of the GNU Lesser General Public License 20dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 21 22include(`../config.m4') 23 24 25C cycles/limb 26C P5: 14.75 27C P6 model 0-8,10-12) 7.5 28C P6 model 9 (Banias) 29C P6 model 13 (Dothan) 6.75 30C P4 model 0 (Willamette) 24.0 31C P4 model 1 (?) 24.0 32C P4 model 2 (Northwood) 24.0 33C P4 model 3 (Prescott) 34C P4 model 4 (Nocona) 35C K6: 12.5 36C K7: 5.25 37C K8: 38 39 40ifdef(`OPERATION_addmul_1',` 41 define(M4_inst, addl) 42 define(M4_function_1, mpn_addmul_1) 43 44',`ifdef(`OPERATION_submul_1',` 45 define(M4_inst, subl) 46 define(M4_function_1, mpn_submul_1) 47 48',`m4_error(`Need OPERATION_addmul_1 or OPERATION_submul_1 49')')') 50 51MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1) 52 53 54C mp_limb_t M4_function_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, 55C mp_limb_t mult); 56 57define(PARAM_MULTIPLIER, `FRAME+16(%esp)') 58define(PARAM_SIZE, `FRAME+12(%esp)') 59define(PARAM_SRC, `FRAME+8(%esp)') 60define(PARAM_DST, `FRAME+4(%esp)') 61 62 TEXT 63 ALIGN(8) 64 65PROLOGUE(M4_function_1) 66deflit(`FRAME',0) 67 68 pushl %edi 69 pushl %esi 70 pushl %ebx 71 pushl %ebp 72deflit(`FRAME',16) 73 74 movl PARAM_DST,%edi 75 movl PARAM_SRC,%esi 76 movl PARAM_SIZE,%ecx 77 78 xorl %ebx,%ebx 79 andl $3,%ecx 80 jz L(end0) 81 82L(oop0): 83 movl (%esi),%eax 84 mull PARAM_MULTIPLIER 85 leal 4(%esi),%esi 86 addl %ebx,%eax 87 movl $0,%ebx 88 adcl %ebx,%edx 89 M4_inst %eax,(%edi) 90 adcl %edx,%ebx C propagate carry into cylimb 91 92 leal 4(%edi),%edi 93 decl %ecx 94 jnz L(oop0) 95 96L(end0): 97 movl PARAM_SIZE,%ecx 98 shrl $2,%ecx 99 jz L(end) 100 101 ALIGN(8) 102L(oop): movl (%esi),%eax 103 mull PARAM_MULTIPLIER 104 addl %eax,%ebx 105 movl $0,%ebp 106 adcl %edx,%ebp 107 108 movl 4(%esi),%eax 109 mull PARAM_MULTIPLIER 110 M4_inst %ebx,(%edi) 111 adcl %eax,%ebp C new lo + cylimb 112 movl $0,%ebx 113 adcl %edx,%ebx 114 115 movl 8(%esi),%eax 116 mull PARAM_MULTIPLIER 117 M4_inst %ebp,4(%edi) 118 adcl %eax,%ebx C new lo + cylimb 119 movl $0,%ebp 120 adcl %edx,%ebp 121 122 movl 12(%esi),%eax 123 mull PARAM_MULTIPLIER 124 M4_inst %ebx,8(%edi) 125 adcl %eax,%ebp C new lo + cylimb 126 movl $0,%ebx 127 adcl %edx,%ebx 128 129 M4_inst %ebp,12(%edi) 130 adcl $0,%ebx C propagate carry into cylimb 131 132 leal 16(%esi),%esi 133 leal 16(%edi),%edi 134 decl %ecx 135 jnz L(oop) 136 137L(end): movl %ebx,%eax 138 139 popl %ebp 140 popl %ebx 141 popl %esi 142 popl %edi 143 ret 144 145EPILOGUE() 146