logops_n.asm revision 1.1.1.3
1dnl  PowerPC-64 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,
2dnl  mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.
3
4dnl  Copyright 2003-2005 Free Software Foundation, Inc.
5
6dnl  This file is part of the GNU MP Library.
7dnl
8dnl  The GNU MP Library is free software; you can redistribute it and/or modify
9dnl  it under the terms of either:
10dnl
11dnl    * the GNU Lesser General Public License as published by the Free
12dnl      Software Foundation; either version 3 of the License, or (at your
13dnl      option) any later version.
14dnl
15dnl  or
16dnl
17dnl    * the GNU General Public License as published by the Free Software
18dnl      Foundation; either version 2 of the License, or (at your option) any
19dnl      later version.
20dnl
21dnl  or both in parallel, as here.
22dnl
23dnl  The GNU MP Library is distributed in the hope that it will be useful, but
24dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
26dnl  for more details.
27dnl
28dnl  You should have received copies of the GNU General Public License and the
29dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
30dnl  see https://www.gnu.org/licenses/.
31
32include(`../config.m4')
33
34C                  cycles/limb
35C POWER3/PPC630          1.75
36C POWER4/PPC970          2.10
37C POWER5                 ?
38C POWER6                 ?
39C POWER7                 1.75
40
41C   n	   POWER3/PPC630   POWER4/PPC970
42C     1	       15.00	       15.33
43C     2		7.50		7.99
44C     3		5.33		6.00
45C     4		4.50		4.74
46C     5		4.20		4.39
47C     6		3.50		3.99
48C     7		3.14		3.64
49C     8		3.00		3.36
50C     9		3.00		3.36
51C    10		2.70		3.25
52C    11		2.63		3.11
53C    12		2.58		3.00
54C    13		2.61		3.02
55C    14		2.42		2.82
56C    15		2.40		2.79
57C    50		2.08		2.67
58C   100		1.85		2.31
59C   200		1.80		2.18
60C   400		1.77		2.14
61C  1000		1.76		2.10#
62C  2000		1.75#		2.13
63C  4000		2.30		2.57
64C  8000		2.62		2.58
65C 16000		2.52		4.25
66C 32000		2.49	       16.25
67C 64000		2.66	       18.76
68
69ifdef(`OPERATION_and_n',
70`	define(`func',`mpn_and_n')
71	define(`logop',		`and')')
72ifdef(`OPERATION_andn_n',
73`	define(`func',`mpn_andn_n')
74	define(`logop',		`andc')')
75ifdef(`OPERATION_nand_n',
76`	define(`func',`mpn_nand_n')
77	define(`logop',		`nand')')
78ifdef(`OPERATION_ior_n',
79`	define(`func',`mpn_ior_n')
80	define(`logop',		`or')')
81ifdef(`OPERATION_iorn_n',
82`	define(`func',`mpn_iorn_n')
83	define(`logop',		`orc')')
84ifdef(`OPERATION_nior_n',
85`	define(`func',`mpn_nior_n')
86	define(`logop',		`nor')')
87ifdef(`OPERATION_xor_n',
88`	define(`func',`mpn_xor_n')
89	define(`logop',		`xor')')
90ifdef(`OPERATION_xnor_n',
91`	define(`func',`mpn_xnor_n')
92	define(`logop',		`eqv')')
93
94C INPUT PARAMETERS
95C rp	r3
96C up	r4
97C vp	r5
98C n	r6
99
100MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
101
102ASM_START()
103PROLOGUE(func)
104	ld	r8, 0(r4)	C read lowest u limb
105	ld	r9, 0(r5)	C read lowest v limb
106	addi	r6, r6, 3	C compute branch count (1)
107	rldic.	r0, r6, 3, 59	C r0 = (n-1 & 3) << 3; cr0 = (n == 4(t+1))?
108	cmpldi	cr6, r0, 16	C cr6 = (n cmp 4t + 3)
109
110ifdef(`HAVE_ABI_mode32',
111`	rldicl	r6, r6, 62,34',	C ...branch count
112`	rldicl	r6, r6, 62, 2')	C ...branch count
113	mtctr	r6
114
115	ld	r6, 0(r4)	C read lowest u limb (again)
116	ld	r7, 0(r5)	C read lowest v limb (again)
117
118	add	r5, r5, r0	C offset vp
119	add	r4, r4, r0	C offset up
120	add	r3, r3, r0	C offset rp
121
122	beq	cr0, L(L01)
123	blt	cr6, L(L10)
124	beq	cr6, L(L11)
125	b	L(L00)
126
127L(oop):	ld	r8, -24(r4)
128	ld	r9, -24(r5)
129	logop	r10, r6, r7
130	std	r10, -32(r3)
131L(L00):	ld	r6, -16(r4)
132	ld	r7, -16(r5)
133	logop	r10, r8, r9
134	std	r10, -24(r3)
135L(L11):	ld	r8, -8(r4)
136	ld	r9, -8(r5)
137	logop	r10, r6, r7
138	std	r10, -16(r3)
139L(L10):	ld	r6, 0(r4)
140	ld	r7, 0(r5)
141	logop	r10, r8, r9
142	std	r10, -8(r3)
143L(L01):	addi	r5, r5, 32
144	addi	r4, r4, 32
145	addi	r3, r3, 32
146	bdnz	L(oop)
147
148	logop	r10, r6, r7
149	std	r10, -32(r3)
150	blr
151EPILOGUE()
152