1dnl  PowerPC-64 mpn_rshift -- rp[] = up[] << cnt
2
3dnl  Copyright 2003, 2005, 2010, 2013 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33C		    cycles/limb
34C POWER3/PPC630		 ?
35C POWER4/PPC970		 ?
36C POWER5		 2
37C POWER6		 3.5  (mysteriously 3.0 for cnt=1)
38
39C TODO
40C  * Micro-optimise header code
41C  * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6.  The code is 4248
42C    bytes, 4-way code would become about 50% larger.
43
44C INPUT PARAMETERS
45define(`rp_param',  `r3')
46define(`up',  `r4')
47define(`n',   `r5')
48define(`cnt', `r6')
49
50define(`tnc',`r0')
51define(`retval',`r3')
52define(`rp',  `r7')
53
54ASM_START()
55PROLOGUE(mpn_rshift,toc)
56
57ifdef(`HAVE_ABI_mode32',`
58	rldicl	n, n, 0,32		C FIXME: avoid this zero extend
59')
60	mflr	r12
61	LEAL(	r11, L(e1))		C address of L(e1) label in SHIFT(1)
62	sldi	r10, cnt, 6		C multiply cnt by size of a SHIFT block
63	add	r11, r11, r10		C address of L(oN) for N = cnt
64	srdi	r10, n, 1
65	mr	rp, rp_param
66	subfic	tnc, cnt, 64
67	rlwinm.	r8, n, 0,31,31		C extract bit 0
68	mtctr	r10
69	beq	L(evn)
70
71L(odd):	ld	r9, 0(up)
72	cmpdi	cr0, n, 1		C n = 1?
73	beq	L(1)
74	ld	r8, 8(up)
75	addi	r11, r11, -84		C L(o1) - L(e1) - 64
76	mtlr	r11
77	sld	r3, r9, tnc		C retval
78	addi	up, up, 8
79	addi	rp, rp, 8
80	blr				C branch to L(oN)
81
82L(evn):	ld	r8, 0(up)
83	ld	r9, 8(up)
84	addi	r11, r11, -64
85	mtlr	r11
86	sld	r3, r8, tnc		C retval
87	addi	up, up, 16
88	blr				C branch to L(eN)
89
90L(1):	sld	r3, r9, tnc		C retval
91	srd	r8, r9, cnt
92	std	r8, 0(rp)
93	mtlr	r12
94ifdef(`HAVE_ABI_mode32',
95`	mr	r4, r3
96	srdi	r3, r3, 32
97')
98	blr
99
100
101define(SHIFT,`
102L(lo$1):ld	r8, 0(up)
103	std	r11, 0(rp)
104	addi	rp, rp, 16
105L(o$1):	srdi	r10, r9, $1
106	rldimi	r10, r8, eval(64-$1), 0
107	ld	r9, 8(up)
108	addi	up, up, 16
109	std	r10, -8(rp)
110L(e$1):	srdi	r11, r8, $1
111	rldimi	r11, r9, eval(64-$1), 0
112	bdnz	L(lo$1)
113	std	r11, 0(rp)
114	srdi	r10, r9, $1
115	b	L(com)
116	nop
117	nop
118')
119
120	ALIGN(64)
121forloop(`i',1,63,`SHIFT(i)')
122
123L(com):	std	r10, 8(rp)
124	mtlr	r12
125ifdef(`HAVE_ABI_mode32',
126`	mr	r4, r3
127	srdi	r3, r3, 32
128')
129	blr
130EPILOGUE()
131ASM_END()
132