logops_n.asm revision 1.1.1.2
1dnl  PowerPC-64 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,
2dnl  mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.
3
4dnl  Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
5
6dnl  This file is part of the GNU MP Library.
7
8dnl  The GNU MP Library is free software; you can redistribute it and/or modify
9dnl  it under the terms of the GNU Lesser General Public License as published
10dnl  by the Free Software Foundation; either version 3 of the License, or (at
11dnl  your option) any later version.
12
13dnl  The GNU MP Library is distributed in the hope that it will be useful, but
14dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16dnl  License for more details.
17
18dnl  You should have received a copy of the GNU Lesser General Public License
19dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
20
21include(`../config.m4')
22
23C                  cycles/limb
24C POWER3/PPC630          1.75
25C POWER4/PPC970          2.10
26C POWER5                 ?
27C POWER6                 ?
28C POWER7                 1.75
29
30C   n	   POWER3/PPC630   POWER4/PPC970
31C     1	       15.00	       15.33
32C     2		7.50		7.99
33C     3		5.33		6.00
34C     4		4.50		4.74
35C     5		4.20		4.39
36C     6		3.50		3.99
37C     7		3.14		3.64
38C     8		3.00		3.36
39C     9		3.00		3.36
40C    10		2.70		3.25
41C    11		2.63		3.11
42C    12		2.58		3.00
43C    13		2.61		3.02
44C    14		2.42		2.82
45C    15		2.40		2.79
46C    50		2.08		2.67
47C   100		1.85		2.31
48C   200		1.80		2.18
49C   400		1.77		2.14
50C  1000		1.76		2.10#
51C  2000		1.75#		2.13
52C  4000		2.30		2.57
53C  8000		2.62		2.58
54C 16000		2.52		4.25
55C 32000		2.49	       16.25
56C 64000		2.66	       18.76
57
58ifdef(`OPERATION_and_n',
59`	define(`func',`mpn_and_n')
60	define(`logop',		`and')')
61ifdef(`OPERATION_andn_n',
62`	define(`func',`mpn_andn_n')
63	define(`logop',		`andc')')
64ifdef(`OPERATION_nand_n',
65`	define(`func',`mpn_nand_n')
66	define(`logop',		`nand')')
67ifdef(`OPERATION_ior_n',
68`	define(`func',`mpn_ior_n')
69	define(`logop',		`or')')
70ifdef(`OPERATION_iorn_n',
71`	define(`func',`mpn_iorn_n')
72	define(`logop',		`orc')')
73ifdef(`OPERATION_nior_n',
74`	define(`func',`mpn_nior_n')
75	define(`logop',		`nor')')
76ifdef(`OPERATION_xor_n',
77`	define(`func',`mpn_xor_n')
78	define(`logop',		`xor')')
79ifdef(`OPERATION_xnor_n',
80`	define(`func',`mpn_xnor_n')
81	define(`logop',		`eqv')')
82
83C INPUT PARAMETERS
84C rp	r3
85C up	r4
86C vp	r5
87C n	r6
88
89MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
90
91ASM_START()
92PROLOGUE(func)
93	ld	r8, 0(r4)	C read lowest u limb
94	ld	r9, 0(r5)	C read lowest v limb
95	addi	r6, r6, 3	C compute branch count (1)
96	rldic.	r0, r6, 3, 59	C r0 = (n-1 & 3) << 3; cr0 = (n == 4(t+1))?
97	cmpldi	cr6, r0, 16	C cr6 = (n cmp 4t + 3)
98
99ifdef(`HAVE_ABI_mode32',
100`	rldicl	r6, r6, 62,34',	C ...branch count
101`	rldicl	r6, r6, 62, 2')	C ...branch count
102	mtctr	r6
103
104	ld	r6, 0(r4)	C read lowest u limb (again)
105	ld	r7, 0(r5)	C read lowest v limb (again)
106
107	add	r5, r5, r0	C offset vp
108	add	r4, r4, r0	C offset up
109	add	r3, r3, r0	C offset rp
110
111	beq	cr0, L(L01)
112	blt	cr6, L(L10)
113	beq	cr6, L(L11)
114	b	L(L00)
115
116L(oop):	ld	r8, -24(r4)
117	ld	r9, -24(r5)
118	logop	r10, r6, r7
119	std	r10, -32(r3)
120L(L00):	ld	r6, -16(r4)
121	ld	r7, -16(r5)
122	logop	r10, r8, r9
123	std	r10, -24(r3)
124L(L11):	ld	r8, -8(r4)
125	ld	r9, -8(r5)
126	logop	r10, r6, r7
127	std	r10, -16(r3)
128L(L10):	ld	r6, 0(r4)
129	ld	r7, 0(r5)
130	logop	r10, r8, r9
131	std	r10, -8(r3)
132L(L01):	addi	r5, r5, 32
133	addi	r4, r4, 32
134	addi	r3, r3, 32
135	bdnz	L(oop)
136
137	logop	r10, r6, r7
138	std	r10, -32(r3)
139	blr
140EPILOGUE()
141