1dnl  PowerPC-64 mpn_rsh1sub_n -- rp[] = (up[] - vp[]) >> 1
2
3dnl  Copyright 2003, 2005 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of the GNU Lesser General Public License as published
9dnl  by the Free Software Foundation; either version 3 of the License, or (at
10dnl  your option) any later version.
11
12dnl  The GNU MP Library is distributed in the hope that it will be useful, but
13dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15dnl  License for more details.
16
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20include(`../config.m4')
21
22C		cycles/limb
23C POWER3/PPC630:     2		(1.5 c/l should be possible)
24C POWER4/PPC970:     4		(2.0 c/l should be possible)
25
26C INPUT PARAMETERS
27C rp	r3
28C up	r4
29C vp	r5
30C n	r6
31
32define(`rp',`r3')
33define(`up',`r4')
34define(`vp',`r5')
35
36define(`s0',`r6')
37define(`s1',`r7')
38define(`x',`r0')
39define(`u0',`r8')
40define(`u1',`r9')
41define(`v0',`r10')
42define(`v1',`r11')
43
44
45ASM_START()
46PROLOGUE(mpn_rsh1sub_n)
47	mtctr	r6		C copy size to count register
48	addi	rp, rp, -8
49
50	ld	u1, 0(up)
51	ld	v1, 0(vp)
52	subfc	x, v1, u1
53	rldicl	r12, x, 0, 63	C return value
54	srdi	s1, x, 1
55
56	bdz	L(1)
57
58	ld	u0, 8(up)
59	ld	v0, 8(vp)
60
61	bdz	L(end)
62
63L(oop):	ldu	u1, 16(up)
64	ldu	v1, 16(vp)
65	subfe	x, v0, u0
66	srdi	s0, x, 1
67	rldimi	s1, x, 63, 0
68	std	s1, 8(rp)
69
70	bdz	L(exit)
71
72	ld	u0, 8(up)
73	ld	v0, 8(vp)
74	subfe	x, v1, u1
75	srdi	s1, x, 1
76	rldimi	s0, x, 63, 0
77	stdu	s0, 16(rp)
78
79	bdnz	L(oop)
80
81L(end):	subfe	x, v0, u0
82	srdi	s0, x, 1
83	rldimi	s1, x, 63, 0
84	std	s1, 8(rp)
85
86	subfe	x, x, x
87	rldimi	s0, x, 63, 0
88	std	s0, 16(rp)
89	mr	r3, r12
90	blr
91
92L(exit):	subfe	x, v1, u1
93	srdi	s1, x, 1
94	rldimi	s0, x, 63, 0
95	stdu	s0, 16(rp)
96
97L(1):	subfe	x, x, x
98	rldimi	s1, x, 63, 0
99	std	s1, 8(rp)
100	mr	r3, r12
101	blr
102EPILOGUE()
103