1dnl Intel Pentium mpn_mul_basecase -- mpn by mpn multiplication. 2 3dnl Copyright 1996, 1998, 1999, 2000, 2002 Free Software Foundation, Inc. 4dnl 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or 8dnl modify it under the terms of the GNU Lesser General Public License as 9dnl published by the Free Software Foundation; either version 3 of the 10dnl License, or (at your option) any later version. 11dnl 12dnl The GNU MP Library is distributed in the hope that it will be useful, 13dnl but WITHOUT ANY WARRANTY; without even the implied warranty of 14dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15dnl Lesser General Public License for more details. 16dnl 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22 23C P5: 14.2 cycles/crossproduct (approx) 24 25 26C void mpn_mul_basecase (mp_ptr wp, 27C mp_srcptr xp, mp_size_t xsize, 28C mp_srcptr yp, mp_size_t ysize); 29 30defframe(PARAM_YSIZE, 20) 31defframe(PARAM_YP, 16) 32defframe(PARAM_XSIZE, 12) 33defframe(PARAM_XP, 8) 34defframe(PARAM_WP, 4) 35 36defframe(VAR_COUNTER, -4) 37 38 TEXT 39 ALIGN(8) 40PROLOGUE(mpn_mul_basecase) 41 42 pushl %eax C dummy push for allocating stack slot 43 pushl %esi 44 pushl %ebp 45 pushl %edi 46deflit(`FRAME',16) 47 48 movl PARAM_XP,%esi 49 movl PARAM_WP,%edi 50 movl PARAM_YP,%ebp 51 52 movl (%esi),%eax C load xp[0] 53 mull (%ebp) C multiply by yp[0] 54 movl %eax,(%edi) C store to wp[0] 55 movl PARAM_XSIZE,%ecx C xsize 56 decl %ecx C If xsize = 1, ysize = 1 too 57 jz L(done) 58 59 movl PARAM_XSIZE,%eax 60 pushl %ebx 61FRAME_pushl() 62 movl %edx,%ebx 63 leal (%esi,%eax,4),%esi C make xp point at end 64 leal (%edi,%eax,4),%edi C offset wp by xsize 65 negl %ecx C negate j size/index for inner loop 66 xorl %eax,%eax C clear carry 67 68 ALIGN(8) 69L(oop1): adcl $0,%ebx 70 movl (%esi,%ecx,4),%eax C load next limb at xp[j] 71 mull (%ebp) 72 addl %ebx,%eax 73 movl %eax,(%edi,%ecx,4) 74 incl %ecx 75 movl %edx,%ebx 76 jnz L(oop1) 77 78 adcl $0,%ebx 79 movl PARAM_YSIZE,%eax 80 movl %ebx,(%edi) C most significant limb of product 81 addl $4,%edi C increment wp 82 decl %eax 83 jz L(skip) 84 movl %eax,VAR_COUNTER C set index i to ysize 85 86L(outer): 87 addl $4,%ebp C make ebp point to next y limb 88 movl PARAM_XSIZE,%ecx 89 negl %ecx 90 xorl %ebx,%ebx 91 92 C code at 0x61 here, close enough to aligned 93L(oop2): 94 adcl $0,%ebx 95 movl (%esi,%ecx,4),%eax 96 mull (%ebp) 97 addl %ebx,%eax 98 movl (%edi,%ecx,4),%ebx 99 adcl $0,%edx 100 addl %eax,%ebx 101 movl %ebx,(%edi,%ecx,4) 102 incl %ecx 103 movl %edx,%ebx 104 jnz L(oop2) 105 106 adcl $0,%ebx 107 108 movl %ebx,(%edi) 109 addl $4,%edi 110 movl VAR_COUNTER,%eax 111 decl %eax 112 movl %eax,VAR_COUNTER 113 jnz L(outer) 114 115L(skip): 116 popl %ebx 117 popl %edi 118 popl %ebp 119 popl %esi 120 addl $4,%esp 121 ret 122 123L(done): 124 movl %edx,4(%edi) C store to wp[1] 125 popl %edi 126 popl %ebp 127 popl %esi 128 popl %eax C dummy pop for deallocating stack slot 129 ret 130 131EPILOGUE() 132 133