1dnl x86 __gmpn_addmul_1 (for 386 and 486) -- Multiply a limb vector with a 2dnl limb and add the result to a second limb vector. 3 4dnl Copyright 1992, 1994, 1997, 1999-2002, 2005 Free Software Foundation, Inc. 5 6dnl This file is part of the GNU MP Library. 7dnl 8dnl The GNU MP Library is free software; you can redistribute it and/or modify 9dnl it under the terms of either: 10dnl 11dnl * the GNU Lesser General Public License as published by the Free 12dnl Software Foundation; either version 3 of the License, or (at your 13dnl option) any later version. 14dnl 15dnl or 16dnl 17dnl * the GNU General Public License as published by the Free Software 18dnl Foundation; either version 2 of the License, or (at your option) any 19dnl later version. 20dnl 21dnl or both in parallel, as here. 22dnl 23dnl The GNU MP Library is distributed in the hope that it will be useful, but 24dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26dnl for more details. 27dnl 28dnl You should have received copies of the GNU General Public License and the 29dnl GNU Lesser General Public License along with the GNU MP Library. If not, 30dnl see https://www.gnu.org/licenses/. 31 32include(`../config.m4') 33 34C cycles/limb 35C P5 14.75 36C P6 model 0-8,10-12 7.5 37C P6 model 9 (Banias) 6.7 38C P6 model 13 (Dothan) 6.75 39C P4 model 0 (Willamette) 24.0 40C P4 model 1 (?) 24.0 41C P4 model 2 (Northwood) 24.0 42C P4 model 3 (Prescott) 43C P4 model 4 (Nocona) 44C Intel Atom 45C AMD K6 12.5 46C AMD K7 5.25 47C AMD K8 48C AMD K10 49 50 51ifdef(`OPERATION_addmul_1',` 52 define(M4_inst, addl) 53 define(M4_function_1, mpn_addmul_1) 54 55',`ifdef(`OPERATION_submul_1',` 56 define(M4_inst, subl) 57 define(M4_function_1, mpn_submul_1) 58 59',`m4_error(`Need OPERATION_addmul_1 or OPERATION_submul_1 60')')') 61 62MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1) 63 64 65C mp_limb_t M4_function_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, 66C mp_limb_t mult); 67 68define(PARAM_MULTIPLIER, `FRAME+16(%esp)') 69define(PARAM_SIZE, `FRAME+12(%esp)') 70define(PARAM_SRC, `FRAME+8(%esp)') 71define(PARAM_DST, `FRAME+4(%esp)') 72 73 TEXT 74 ALIGN(8) 75 76PROLOGUE(M4_function_1) 77deflit(`FRAME',0) 78 79 pushl %edi 80 pushl %esi 81 pushl %ebx 82 pushl %ebp 83deflit(`FRAME',16) 84 85 movl PARAM_DST,%edi 86 movl PARAM_SRC,%esi 87 movl PARAM_SIZE,%ecx 88 89 xorl %ebx,%ebx 90 andl $3,%ecx 91 jz L(end0) 92 93L(oop0): 94 movl (%esi),%eax 95 mull PARAM_MULTIPLIER 96 leal 4(%esi),%esi 97 addl %ebx,%eax 98 movl $0,%ebx 99 adcl %ebx,%edx 100 M4_inst %eax,(%edi) 101 adcl %edx,%ebx C propagate carry into cylimb 102 103 leal 4(%edi),%edi 104 decl %ecx 105 jnz L(oop0) 106 107L(end0): 108 movl PARAM_SIZE,%ecx 109 shrl $2,%ecx 110 jz L(end) 111 112 ALIGN(8) 113L(oop): movl (%esi),%eax 114 mull PARAM_MULTIPLIER 115 addl %eax,%ebx 116 movl $0,%ebp 117 adcl %edx,%ebp 118 119 movl 4(%esi),%eax 120 mull PARAM_MULTIPLIER 121 M4_inst %ebx,(%edi) 122 adcl %eax,%ebp C new lo + cylimb 123 movl $0,%ebx 124 adcl %edx,%ebx 125 126 movl 8(%esi),%eax 127 mull PARAM_MULTIPLIER 128 M4_inst %ebp,4(%edi) 129 adcl %eax,%ebx C new lo + cylimb 130 movl $0,%ebp 131 adcl %edx,%ebp 132 133 movl 12(%esi),%eax 134 mull PARAM_MULTIPLIER 135 M4_inst %ebx,8(%edi) 136 adcl %eax,%ebp C new lo + cylimb 137 movl $0,%ebx 138 adcl %edx,%ebx 139 140 M4_inst %ebp,12(%edi) 141 adcl $0,%ebx C propagate carry into cylimb 142 143 leal 16(%esi),%esi 144 leal 16(%edi),%edi 145 decl %ecx 146 jnz L(oop) 147 148L(end): movl %ebx,%eax 149 150 popl %ebp 151 popl %ebx 152 popl %esi 153 popl %edi 154 ret 155 156EPILOGUE() 157