1dnl x86 mpn_bdiv_q_1 -- mpn by limb exact division. 2 3dnl Rearranged from mpn/x86/dive_1.asm by Marco Bodrato. 4 5dnl Copyright 2001, 2002, 2007, 2011 Free Software Foundation, Inc. 6 7dnl This file is part of the GNU MP Library. 8dnl 9dnl The GNU MP Library is free software; you can redistribute it and/or modify 10dnl it under the terms of either: 11dnl 12dnl * the GNU Lesser General Public License as published by the Free 13dnl Software Foundation; either version 3 of the License, or (at your 14dnl option) any later version. 15dnl 16dnl or 17dnl 18dnl * the GNU General Public License as published by the Free Software 19dnl Foundation; either version 2 of the License, or (at your option) any 20dnl later version. 21dnl 22dnl or both in parallel, as here. 23dnl 24dnl The GNU MP Library is distributed in the hope that it will be useful, but 25dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27dnl for more details. 28dnl 29dnl You should have received copies of the GNU General Public License and the 30dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31dnl see https://www.gnu.org/licenses/. 32 33include(`../config.m4') 34 35 36C cycles/limb 37C P54 30.0 38C P55 29.0 39C P6 13.0 odd divisor, 12.0 even (strangely) 40C K6 14.0 41C K7 12.0 42C P4 42.0 43 44MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1) 45 46defframe(PARAM_SHIFT, 24) 47defframe(PARAM_INVERSE,20) 48defframe(PARAM_DIVISOR,16) 49defframe(PARAM_SIZE, 12) 50defframe(PARAM_SRC, 8) 51defframe(PARAM_DST, 4) 52 53dnl re-use parameter space 54define(VAR_INVERSE,`PARAM_SRC') 55 56 TEXT 57 58C mp_limb_t 59C mpn_pi1_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor, 60C mp_limb_t inverse, int shift) 61 62 ALIGN(16) 63PROLOGUE(mpn_pi1_bdiv_q_1) 64deflit(`FRAME',0) 65 66 movl PARAM_SHIFT, %ecx 67 pushl %ebp FRAME_pushl() 68 69 movl PARAM_INVERSE, %eax 70 movl PARAM_SIZE, %ebp 71 pushl %ebx FRAME_pushl() 72L(common): 73 pushl %edi FRAME_pushl() 74 pushl %esi FRAME_pushl() 75 76 movl PARAM_SRC, %esi 77 movl PARAM_DST, %edi 78 79 leal (%esi,%ebp,4), %esi C src end 80 leal (%edi,%ebp,4), %edi C dst end 81 negl %ebp C -size 82 83 movl %eax, VAR_INVERSE 84 movl (%esi,%ebp,4), %eax C src[0] 85 86 xorl %ebx, %ebx 87 xorl %edx, %edx 88 89 incl %ebp 90 jz L(one) 91 92 movl (%esi,%ebp,4), %edx C src[1] 93 94 shrdl( %cl, %edx, %eax) 95 96 movl VAR_INVERSE, %edx 97 jmp L(entry) 98 99 100 ALIGN(8) 101 nop C k6 code alignment 102 nop 103L(top): 104 C eax q 105 C ebx carry bit, 0 or -1 106 C ecx shift 107 C edx carry limb 108 C esi src end 109 C edi dst end 110 C ebp counter, limbs, negative 111 112 movl -4(%esi,%ebp,4), %eax 113 subl %ebx, %edx C accumulate carry bit 114 115 movl (%esi,%ebp,4), %ebx 116 117 shrdl( %cl, %ebx, %eax) 118 119 subl %edx, %eax C apply carry limb 120 movl VAR_INVERSE, %edx 121 122 sbbl %ebx, %ebx 123 124L(entry): 125 imull %edx, %eax 126 127 movl %eax, -4(%edi,%ebp,4) 128 movl PARAM_DIVISOR, %edx 129 130 mull %edx 131 132 incl %ebp 133 jnz L(top) 134 135 136 movl -4(%esi), %eax C src high limb 137L(one): 138 shrl %cl, %eax 139 popl %esi FRAME_popl() 140 141 addl %ebx, %eax C apply carry bit 142 143 subl %edx, %eax C apply carry limb 144 145 imull VAR_INVERSE, %eax 146 147 movl %eax, -4(%edi) 148 149 popl %edi 150 popl %ebx 151 popl %ebp 152 153 ret 154 155EPILOGUE() 156 157C mp_limb_t mpn_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, 158C mp_limb_t divisor); 159C 160 161 ALIGN(16) 162PROLOGUE(mpn_bdiv_q_1) 163deflit(`FRAME',0) 164 165 movl PARAM_DIVISOR, %eax 166 pushl %ebp FRAME_pushl() 167 168 movl $-1, %ecx C shift count 169 movl PARAM_SIZE, %ebp 170 171 pushl %ebx FRAME_pushl() 172 173L(strip_twos): 174 incl %ecx 175 176 shrl %eax 177 jnc L(strip_twos) 178 179 leal 1(%eax,%eax), %ebx C d without twos 180 andl $127, %eax C d/2, 7 bits 181 182ifdef(`PIC',` 183 LEA( binvert_limb_table, %edx) 184 movzbl (%eax,%edx), %eax C inv 8 bits 185',` 186 movzbl binvert_limb_table(%eax), %eax C inv 8 bits 187') 188 189 leal (%eax,%eax), %edx C 2*inv 190 movl %ebx, PARAM_DIVISOR C d without twos 191 imull %eax, %eax C inv*inv 192 imull %ebx, %eax C inv*inv*d 193 subl %eax, %edx C inv = 2*inv - inv*inv*d 194 195 leal (%edx,%edx), %eax C 2*inv 196 imull %edx, %edx C inv*inv 197 imull %ebx, %edx C inv*inv*d 198 subl %edx, %eax C inv = 2*inv - inv*inv*d 199 200 ASSERT(e,` C expect d*inv == 1 mod 2^GMP_LIMB_BITS 201 pushl %eax FRAME_pushl() 202 imull PARAM_DIVISOR, %eax 203 cmpl $1, %eax 204 popl %eax FRAME_popl()') 205 206 jmp L(common) 207EPILOGUE() 208ASM_END() 209