bdiv_dbm1c.asm revision 1.1.1.3
1dnl  x86 mpn_bdiv_dbm1.
2
3dnl  Copyright 2008, 2011 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33C			    cycles/limb
34C P5
35C P6 model 0-8,10-12)
36C P6 model 9  (Banias)
37C P6 model 13 (Dothan)		 5.1
38C P4 model 0  (Willamette)
39C P4 model 1  (?)
40C P4 model 2  (Northwood)	13.67
41C P4 model 3  (Prescott)
42C P4 model 4  (Nocona)
43C Intel Atom
44C AMD K6
45C AMD K7			 3.5
46C AMD K8
47C AMD K10
48
49
50C TODO
51C  * Optimize for more x86 processors
52
53ASM_START()
54	TEXT
55	ALIGN(16)
56PROLOGUE(mpn_bdiv_dbm1c)
57	mov	16(%esp), %ecx		C d
58	push	%esi
59	mov	12(%esp), %esi		C ap
60	push	%edi
61	mov	12(%esp), %edi		C qp
62	push	%ebp
63	mov	24(%esp), %ebp		C n
64	push	%ebx
65
66	mov	(%esi), %eax
67	mul	%ecx
68	mov	36(%esp), %ebx
69	sub	%eax, %ebx
70	mov	%ebx, (%edi)
71	sbb	%edx, %ebx
72
73	mov	%ebp, %eax
74	and	$3, %eax
75	jz	L(b0)
76	cmp	$2, %eax
77	jc	L(b1)
78	jz	L(b2)
79
80L(b3):	lea	-8(%esi), %esi
81	lea	8(%edi), %edi
82	add	$-3, %ebp
83	jmp	L(3)
84
85L(b0):	mov	4(%esi), %eax
86	lea	-4(%esi), %esi
87	lea	12(%edi), %edi
88	add	$-4, %ebp
89	jmp	L(0)
90
91L(b2):	mov	4(%esi), %eax
92	lea	4(%esi), %esi
93	lea	4(%edi), %edi
94	add	$-2, %ebp
95	jmp	L(2)
96
97	ALIGN(8)
98L(top):	mov	4(%esi), %eax
99	mul	%ecx
100	lea	16(%edi), %edi
101	sub	%eax, %ebx
102	mov	8(%esi), %eax
103	mov	%ebx, -12(%edi)
104	sbb	%edx, %ebx
105L(0):	mul	%ecx
106	sub	%eax, %ebx
107	mov	%ebx, -8(%edi)
108	sbb	%edx, %ebx
109L(3):	mov	12(%esi), %eax
110	mul	%ecx
111	sub	%eax, %ebx
112	mov	%ebx, -4(%edi)
113	mov	16(%esi), %eax
114	lea	16(%esi), %esi
115	sbb	%edx, %ebx
116L(2):	mul	%ecx
117	sub	%eax, %ebx
118	mov	%ebx, 0(%edi)
119	sbb	%edx, %ebx
120L(b1):	add	$-4, %ebp
121	jns	L(top)
122
123	mov	%ebx, %eax
124	pop	%ebx
125	pop	%ebp
126	pop	%edi
127	pop	%esi
128	ret
129EPILOGUE()
130