1dnl x86 mpn_bdiv_dbm1. 2 3dnl Copyright 2008 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of the GNU Lesser General Public License as published 9dnl by the Free Software Foundation; either version 3 of the License, or (at 10dnl your option) any later version. 11 12dnl The GNU MP Library is distributed in the hope that it will be useful, but 13dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15dnl License for more details. 16 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22C cycles/limb 23C K7: 3.5 24C P4 m0: ? 25C P4 m1: ? 26C P4 m2: 13.67 27C P4 m3: ? 28C P4 m4: ? 29C P6-13: 5.1 30 31C TODO 32C * Optimize for more x86 processors 33 34ASM_START() 35 TEXT 36 ALIGN(16) 37PROLOGUE(mpn_bdiv_dbm1c) 38 mov 16(%esp), %ecx C d 39 push %esi 40 mov 12(%esp), %esi C ap 41 push %edi 42 mov 12(%esp), %edi C qp 43 push %ebp 44 mov 24(%esp), %ebp C n 45 push %ebx 46 47 mov (%esi), %eax 48 mul %ecx 49 mov 36(%esp), %ebx 50 sub %eax, %ebx 51 mov %ebx, (%edi) 52 sbb %edx, %ebx 53 54 mov %ebp, %eax 55 and $3, %eax 56 jz L(b0) 57 cmp $2, %eax 58 jc L(b1) 59 jz L(b2) 60 jmp L(b3) 61 62L(b0): mov 4(%esi), %eax 63 lea -4(%esi), %esi 64 lea 12(%edi), %edi 65 add $-4, %ebp 66 jmp L(0) 67L(b3): 68 lea -8(%esi), %esi 69 lea 8(%edi), %edi 70 add $-3, %ebp 71 jmp L(3) 72 73L(b2): mov 4(%esi), %eax 74 lea 4(%esi), %esi 75 lea 4(%edi), %edi 76 add $-2, %ebp 77 jmp L(2) 78 79 ALIGN(8) 80L(top): 81 mov 4(%esi), %eax 82 mul %ecx 83 lea 16(%edi), %edi 84 sub %eax, %ebx 85 mov 8(%esi), %eax 86 mov %ebx, -12(%edi) 87 sbb %edx, %ebx 88L(0): mul %ecx 89 sub %eax, %ebx 90 mov %ebx, -8(%edi) 91 sbb %edx, %ebx 92L(3): mov 12(%esi), %eax 93 mul %ecx 94 sub %eax, %ebx 95 mov %ebx, -4(%edi) 96 mov 16(%esi), %eax 97 lea 16(%esi), %esi 98 sbb %edx, %ebx 99L(2): mul %ecx 100 sub %eax, %ebx 101 mov %ebx, 0(%edi) 102 sbb %edx, %ebx 103L(b1): add $-4, %ebp 104 jns L(top) 105 106 mov %ebx, %eax 107 pop %ebx 108 pop %ebp 109 pop %edi 110 pop %esi 111 ret 112EPILOGUE() 113