1dnl  AMD64 logops.
2
3dnl  Copyright 2004, 2005, 2006 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of the GNU Lesser General Public License as published
9dnl  by the Free Software Foundation; either version 3 of the License, or (at
10dnl  your option) any later version.
11
12dnl  The GNU MP Library is distributed in the hope that it will be useful, but
13dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15dnl  License for more details.
16
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20include(`../config.m4')
21
22
23C	     cycles/limb
24C K8,K9:	 1.5
25C K10:		 1.75-2 (fluctuating)
26C P4:		 2.8/3.35/3.60 (variant1/variant2/variant3)
27C P6-15:	 2.0
28
29ifdef(`OPERATION_and_n',`
30  define(`func',`mpn_and_n')
31  define(`VARIANT_1')
32  define(`LOGOP',`andq')')
33ifdef(`OPERATION_andn_n',`
34  define(`func',`mpn_andn_n')
35  define(`VARIANT_2')
36  define(`LOGOP',`andq')')
37ifdef(`OPERATION_nand_n',`
38  define(`func',`mpn_nand_n')
39  define(`VARIANT_3')
40  define(`LOGOP',`andq')')
41ifdef(`OPERATION_ior_n',`
42  define(`func',`mpn_ior_n')
43  define(`VARIANT_1')
44  define(`LOGOP',`orq')')
45ifdef(`OPERATION_iorn_n',`
46  define(`func',`mpn_iorn_n')
47  define(`VARIANT_2')
48  define(`LOGOP',`orq')')
49ifdef(`OPERATION_nior_n',`
50  define(`func',`mpn_nior_n')
51  define(`VARIANT_3')
52  define(`LOGOP',`orq')')
53ifdef(`OPERATION_xor_n',`
54  define(`func',`mpn_xor_n')
55  define(`VARIANT_1')
56  define(`LOGOP',`xorq')')
57ifdef(`OPERATION_xnor_n',`
58  define(`func',`mpn_xnor_n')
59  define(`VARIANT_2')
60  define(`LOGOP',`xorq')')
61
62
63MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
64
65C INPUT PARAMETERS
66define(`rp',`%rdi')
67define(`up',`%rsi')
68define(`vp',`%rdx')
69define(`n',`%rcx')
70
71
72ASM_START()
73
74ifdef(`VARIANT_1',`
75	TEXT
76	ALIGN(32)
77PROLOGUE(func)
78	movq	(vp), %r8
79	movl	%ecx, %eax
80	leaq	(vp,n,8), vp
81	leaq	(up,n,8), up
82	leaq	(rp,n,8), rp
83	negq	n
84	andl	$3, %eax
85	je	L(b00)
86	cmpl	$2, %eax
87	jc	L(b01)
88	je	L(b10)
89
90L(b11):	LOGOP	(up,n,8), %r8
91	movq	%r8, (rp,n,8)
92	decq	n
93	jmp	L(e11)
94L(b10):	addq	$-2, n
95	jmp	L(e10)
96L(b01):	LOGOP	(up,n,8), %r8
97	movq	%r8, (rp,n,8)
98	incq	n
99	jz	L(ret)
100
101L(oop):	movq	(vp,n,8), %r8
102L(b00):	movq	8(vp,n,8), %r9
103	LOGOP	(up,n,8), %r8
104	LOGOP	8(up,n,8), %r9
105	nop
106	movq	%r8, (rp,n,8)
107	movq	%r9, 8(rp,n,8)
108L(e11):	movq	16(vp,n,8), %r8
109L(e10):	movq	24(vp,n,8), %r9
110	LOGOP	16(up,n,8), %r8
111	LOGOP	24(up,n,8), %r9
112	movq	%r8, 16(rp,n,8)
113	movq	%r9, 24(rp,n,8)
114	addq	$4, n
115	jnc	L(oop)
116L(ret):	ret
117EPILOGUE()
118')
119
120ifdef(`VARIANT_2',`
121	TEXT
122	ALIGN(32)
123PROLOGUE(func)
124	movq	(vp), %r8
125	notq	%r8
126	movl	%ecx, %eax
127	leaq	(vp,n,8), vp
128	leaq	(up,n,8), up
129	leaq	(rp,n,8), rp
130	negq	n
131	andl	$3, %eax
132	je	L(b00)
133	cmpl	$2, %eax
134	jc	L(b01)
135	je	L(b10)
136
137L(b11):	LOGOP	(up,n,8), %r8
138	movq	%r8, (rp,n,8)
139	decq	n
140	jmp	L(e11)
141L(b10):	addq	$-2, n
142	jmp	L(e10)
143	.byte	0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90
144L(b01):	LOGOP	(up,n,8), %r8
145	movq	%r8, (rp,n,8)
146	incq	n
147	jz	L(ret)
148
149L(oop):	movq	(vp,n,8), %r8
150	notq	%r8
151L(b00):	movq	8(vp,n,8), %r9
152	notq	%r9
153	LOGOP	(up,n,8), %r8
154	LOGOP	8(up,n,8), %r9
155	movq	%r8, (rp,n,8)
156	movq	%r9, 8(rp,n,8)
157L(e11):	movq	16(vp,n,8), %r8
158	notq	%r8
159L(e10):	movq	24(vp,n,8), %r9
160	notq	%r9
161	LOGOP	16(up,n,8), %r8
162	LOGOP	24(up,n,8), %r9
163	movq	%r8, 16(rp,n,8)
164	movq	%r9, 24(rp,n,8)
165	addq	$4, n
166	jnc	L(oop)
167L(ret):	ret
168EPILOGUE()
169')
170
171ifdef(`VARIANT_3',`
172	TEXT
173	ALIGN(32)
174PROLOGUE(func)
175	movq	(vp), %r8
176	movl	%ecx, %eax
177	leaq	(vp,n,8), vp
178	leaq	(up,n,8), up
179	leaq	(rp,n,8), rp
180	negq	n
181	andl	$3, %eax
182	je	L(b00)
183	cmpl	$2, %eax
184	jc	L(b01)
185	je	L(b10)
186
187L(b11):	LOGOP	(up,n,8), %r8
188	notq	%r8
189	movq	%r8, (rp,n,8)
190	decq	n
191	jmp	L(e11)
192L(b10):	addq	$-2, n
193	jmp	L(e10)
194	.byte	0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90
195L(b01):	LOGOP	(up,n,8), %r8
196	notq	%r8
197	movq	%r8, (rp,n,8)
198	incq	n
199	jz	L(ret)
200
201L(oop):	movq	(vp,n,8), %r8
202L(b00):	movq	8(vp,n,8), %r9
203	LOGOP	(up,n,8), %r8
204	notq	%r8
205	LOGOP	8(up,n,8), %r9
206	notq	%r9
207	movq	%r8, (rp,n,8)
208	movq	%r9, 8(rp,n,8)
209L(e11):	movq	16(vp,n,8), %r8
210L(e10):	movq	24(vp,n,8), %r9
211	LOGOP	16(up,n,8), %r8
212	notq	%r8
213	LOGOP	24(up,n,8), %r9
214	notq	%r9
215	movq	%r8, 16(rp,n,8)
216	movq	%r9, 24(rp,n,8)
217	addq	$4, n
218	jnc	L(oop)
219L(ret):	ret
220EPILOGUE()
221')
222