1dnl x86 mpn_mul_1 (for 386, 486, and Pentium Pro) -- Multiply a limb vector 2dnl with a limb and store the result in a second limb vector. 3 4dnl Copyright 1992, 1994, 1997, 1998, 1999, 2000, 2001, 2002, 2005 Free 5dnl Software Foundation, Inc. 6dnl 7dnl This file is part of the GNU MP Library. 8dnl 9dnl The GNU MP Library is free software; you can redistribute it and/or 10dnl modify it under the terms of the GNU Lesser General Public License as 11dnl published by the Free Software Foundation; either version 3 of the 12dnl License, or (at your option) any later version. 13dnl 14dnl The GNU MP Library is distributed in the hope that it will be useful, 15dnl but WITHOUT ANY WARRANTY; without even the implied warranty of 16dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17dnl Lesser General Public License for more details. 18dnl 19dnl You should have received a copy of the GNU Lesser General Public License 20dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 21 22include(`../config.m4') 23 24 25C cycles/limb 26C P5: 12.5 27C P6 model 0-8,10-12) 5.5 28C P6 model 9 (Banias) 29C P6 model 13 (Dothan) 5.25 30C P4 model 0 (Willamette) 19.0 31C P4 model 1 (?) 19.0 32C P4 model 2 (Northwood) 19.0 33C P4 model 3 (Prescott) 34C P4 model 4 (Nocona) 35C K6: 10.5 36C K7: 4.5 37C K8: 38 39 40C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, 41C mp_limb_t multiplier); 42 43defframe(PARAM_MULTIPLIER,16) 44defframe(PARAM_SIZE, 12) 45defframe(PARAM_SRC, 8) 46defframe(PARAM_DST, 4) 47 48 TEXT 49 ALIGN(8) 50PROLOGUE(mpn_mul_1) 51deflit(`FRAME',0) 52 53 pushl %edi 54 pushl %esi 55 pushl %ebx 56 pushl %ebp 57deflit(`FRAME',16) 58 59 movl PARAM_DST,%edi 60 movl PARAM_SRC,%esi 61 movl PARAM_SIZE,%ecx 62 63 xorl %ebx,%ebx 64 andl $3,%ecx 65 jz L(end0) 66 67L(oop0): 68 movl (%esi),%eax 69 mull PARAM_MULTIPLIER 70 leal 4(%esi),%esi 71 addl %ebx,%eax 72 movl $0,%ebx 73 adcl %ebx,%edx 74 movl %eax,(%edi) 75 movl %edx,%ebx C propagate carry into cylimb 76 77 leal 4(%edi),%edi 78 decl %ecx 79 jnz L(oop0) 80 81L(end0): 82 movl PARAM_SIZE,%ecx 83 shrl $2,%ecx 84 jz L(end) 85 86 87 ALIGN(8) 88L(oop): movl (%esi),%eax 89 mull PARAM_MULTIPLIER 90 addl %eax,%ebx 91 movl $0,%ebp 92 adcl %edx,%ebp 93 94 movl 4(%esi),%eax 95 mull PARAM_MULTIPLIER 96 movl %ebx,(%edi) 97 addl %eax,%ebp C new lo + cylimb 98 movl $0,%ebx 99 adcl %edx,%ebx 100 101 movl 8(%esi),%eax 102 mull PARAM_MULTIPLIER 103 movl %ebp,4(%edi) 104 addl %eax,%ebx C new lo + cylimb 105 movl $0,%ebp 106 adcl %edx,%ebp 107 108 movl 12(%esi),%eax 109 mull PARAM_MULTIPLIER 110 movl %ebx,8(%edi) 111 addl %eax,%ebp C new lo + cylimb 112 movl $0,%ebx 113 adcl %edx,%ebx 114 115 movl %ebp,12(%edi) 116 117 leal 16(%esi),%esi 118 leal 16(%edi),%edi 119 decl %ecx 120 jnz L(oop) 121 122L(end): movl %ebx,%eax 123 124 popl %ebp 125 popl %ebx 126 popl %esi 127 popl %edi 128 ret 129 130EPILOGUE() 131