dive_1.asm revision 1.1.1.1
1dnl x86 mpn_divexact_1 -- mpn by limb exact division. 2 3dnl Copyright 2001, 2002, 2007 Free Software Foundation, Inc. 4dnl 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or 8dnl modify it under the terms of the GNU Lesser General Public License as 9dnl published by the Free Software Foundation; either version 3 of the 10dnl License, or (at your option) any later version. 11dnl 12dnl The GNU MP Library is distributed in the hope that it will be useful, 13dnl but WITHOUT ANY WARRANTY; without even the implied warranty of 14dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15dnl Lesser General Public License for more details. 16dnl 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22 23C cycles/limb 24C P54 30.0 25C P55 29.0 26C P6 13.0 odd divisor, 12.0 even (strangely) 27C K6 14.0 28C K7 12.0 29C P4 42.0 30 31 32C mp_limb_t mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, 33C mp_limb_t divisor); 34C 35 36defframe(PARAM_DIVISOR,16) 37defframe(PARAM_SIZE, 12) 38defframe(PARAM_SRC, 8) 39defframe(PARAM_DST, 4) 40 41dnl re-use parameter space 42define(VAR_INVERSE,`PARAM_SRC') 43 44 TEXT 45 46 ALIGN(16) 47PROLOGUE(mpn_divexact_1) 48deflit(`FRAME',0) 49 50 movl PARAM_DIVISOR, %eax 51 pushl %ebp FRAME_pushl() 52 53 movl PARAM_SIZE, %ebp 54 pushl %edi FRAME_pushl() 55 56 pushl %ebx FRAME_pushl() 57 movl $-1, %ecx C shift count 58 59 pushl %esi FRAME_pushl() 60 61L(strip_twos): 62 incl %ecx 63 64 shrl %eax 65 jnc L(strip_twos) 66 67 leal 1(%eax,%eax), %ebx C d without twos 68 andl $127, %eax C d/2, 7 bits 69 70ifdef(`PIC',` 71 LEA( binvert_limb_table, %edx) 72 movzbl (%eax,%edx), %eax C inv 8 bits 73',` 74 movzbl binvert_limb_table(%eax), %eax C inv 8 bits 75') 76 77 leal (%eax,%eax), %edx C 2*inv 78 movl %ebx, PARAM_DIVISOR C d without twos 79 80 imull %eax, %eax C inv*inv 81 82 movl PARAM_SRC, %esi 83 movl PARAM_DST, %edi 84 85 imull %ebx, %eax C inv*inv*d 86 87 subl %eax, %edx C inv = 2*inv - inv*inv*d 88 leal (%edx,%edx), %eax C 2*inv 89 90 imull %edx, %edx C inv*inv 91 92 leal (%esi,%ebp,4), %esi C src end 93 leal (%edi,%ebp,4), %edi C dst end 94 negl %ebp C -size 95 96 imull %ebx, %edx C inv*inv*d 97 98 subl %edx, %eax C inv = 2*inv - inv*inv*d 99 100 ASSERT(e,` C expect d*inv == 1 mod 2^GMP_LIMB_BITS 101 pushl %eax FRAME_pushl() 102 imull PARAM_DIVISOR, %eax 103 cmpl $1, %eax 104 popl %eax FRAME_popl()') 105 106 movl %eax, VAR_INVERSE 107 movl (%esi,%ebp,4), %eax C src[0] 108 109 xorl %ebx, %ebx 110 xorl %edx, %edx 111 112 incl %ebp 113 jz L(one) 114 115 movl (%esi,%ebp,4), %edx C src[1] 116 117 shrdl( %cl, %edx, %eax) 118 119 movl VAR_INVERSE, %edx 120 jmp L(entry) 121 122 123 ALIGN(8) 124 nop C k6 code alignment 125 nop 126L(top): 127 C eax q 128 C ebx carry bit, 0 or -1 129 C ecx shift 130 C edx carry limb 131 C esi src end 132 C edi dst end 133 C ebp counter, limbs, negative 134 135 movl -4(%esi,%ebp,4), %eax 136 subl %ebx, %edx C accumulate carry bit 137 138 movl (%esi,%ebp,4), %ebx 139 140 shrdl( %cl, %ebx, %eax) 141 142 subl %edx, %eax C apply carry limb 143 movl VAR_INVERSE, %edx 144 145 sbbl %ebx, %ebx 146 147L(entry): 148 imull %edx, %eax 149 150 movl %eax, -4(%edi,%ebp,4) 151 movl PARAM_DIVISOR, %edx 152 153 mull %edx 154 155 incl %ebp 156 jnz L(top) 157 158 159 movl -4(%esi), %eax C src high limb 160L(one): 161 shrl %cl, %eax 162 popl %esi FRAME_popl() 163 164 addl %ebx, %eax C apply carry bit 165 popl %ebx FRAME_popl() 166 167 subl %edx, %eax C apply carry limb 168 169 imull VAR_INVERSE, %eax 170 171 movl %eax, -4(%edi) 172 173 popl %edi 174 popl %ebp 175 176 ret 177 178EPILOGUE() 179