1dnl  Intel P5 mpn_mod_34lsub1 -- mpn remainder modulo 2**24-1.
2
3dnl  Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
4dnl
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or
8dnl  modify it under the terms of the GNU Lesser General Public License as
9dnl  published by the Free Software Foundation; either version 3 of the
10dnl  License, or (at your option) any later version.
11dnl
12dnl  The GNU MP Library is distributed in the hope that it will be useful,
13dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
14dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15dnl  Lesser General Public License for more details.
16dnl
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20include(`../config.m4')
21
22
23C P5: 1.66 cycles/limb
24
25
26C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size)
27C
28
29defframe(PARAM_SIZE, 8)
30defframe(PARAM_SRC,  4)
31
32	TEXT
33	ALIGN(16)
34PROLOGUE(mpn_mod_34lsub1)
35deflit(`FRAME',0)
36
37	movl	PARAM_SIZE, %ecx
38	movl	PARAM_SRC, %edx
39
40	subl	$2, %ecx
41	ja	L(three_or_more)
42
43	movl	(%edx), %eax
44	jne	L(one)
45
46
47	movl	4(%edx), %ecx
48	movl	%eax, %edx
49
50	shrl	$24, %edx
51	andl	$0xFFFFFF, %eax
52
53	addl	%edx, %eax
54	movl	%ecx, %edx
55
56	shrl	$16, %ecx
57	andl	$0xFFFF, %edx
58
59	shll	$8, %edx
60	addl	%ecx, %eax
61
62	addl	%edx, %eax
63
64L(one):
65	ret
66
67
68L(three_or_more):
69	C eax
70	C ebx
71	C ecx	size-2
72	C edx	src
73	C esi
74	C edi
75	C ebp
76
77	pushl	%ebx	FRAME_pushl()
78	pushl	%esi	FRAME_pushl()
79
80	pushl	%edi	FRAME_pushl()
81	pushl	%ebp	FRAME_pushl()
82
83	xorl	%esi, %esi		C 0mod3
84	xorl	%edi, %edi		C 1mod3
85
86	xorl	%ebp, %ebp		C 2mod3, and clear carry
87
88L(top):
89	C eax	scratch
90	C ebx	scratch
91	C ecx	counter, limbs
92	C edx	src
93	C esi	0mod3
94	C edi	1mod3
95	C ebp	2mod3
96
97	movl	(%edx), %eax
98	movl	4(%edx), %ebx
99
100	adcl	%eax, %esi
101	movl	8(%edx), %eax
102
103	adcl	%ebx, %edi
104	leal	12(%edx), %edx
105
106	adcl	%eax, %ebp
107	leal	-2(%ecx), %ecx
108
109	decl	%ecx
110	jg	L(top)
111
112
113	C ecx is -2, -1 or 0, representing 0, 1 or 2 more limbs, respectively
114
115	movl	$0xFFFFFFFF, %ebx	C mask
116	incl	%ecx
117
118	js	L(combine)		C 0 more
119
120	movl	(%edx), %eax
121	movl	$0xFFFFFF00, %ebx
122
123	adcl	%eax, %esi
124	decl	%ecx
125
126	js	L(combine)		C 1 more
127
128	movl	4(%edx), %eax
129	movl	$0xFFFF0000, %ebx
130
131	adcl	%eax, %edi
132
133
134
135L(combine):
136	C eax
137	C ebx	mask
138	C ecx
139	C edx
140	C esi	0mod3
141	C edi	1mod3
142	C ebp	2mod3
143
144	sbbl	%ecx, %ecx		C carry
145	movl	%esi, %eax		C 0mod3
146
147	andl	%ebx, %ecx		C masked for position
148	andl	$0xFFFFFF, %eax		C 0mod3 low
149
150	shrl	$24, %esi		C 0mod3 high
151	subl	%ecx, %eax		C apply carry
152
153	addl	%esi, %eax		C apply 0mod3
154	movl	%edi, %ebx		C 1mod3
155
156	shrl	$16, %edi		C 1mod3 high
157	andl	$0x0000FFFF, %ebx
158
159	shll	$8, %ebx		C 1mod3 low
160	addl	%edi, %eax		C apply 1mod3 high
161
162	addl	%ebx, %eax		C apply 1mod3 low
163	movl	%ebp, %ebx		C 2mod3
164
165	shrl	$8, %ebp		C 2mod3 high
166	andl	$0xFF, %ebx
167
168	shll	$16, %ebx		C 2mod3 low
169	addl	%ebp, %eax		C apply 2mod3 high
170
171	addl	%ebx, %eax		C apply 2mod3 low
172
173	popl	%ebp
174	popl	%edi
175
176	popl	%esi
177	popl	%ebx
178
179	ret
180
181EPILOGUE()
182