1dnl  SPARC v9 mpn_rshift
2
3dnl  Copyright 1996, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of the GNU Lesser General Public License as published
9dnl  by the Free Software Foundation; either version 3 of the License, or (at
10dnl  your option) any later version.
11
12dnl  The GNU MP Library is distributed in the hope that it will be useful, but
13dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15dnl  License for more details.
16
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20
21include(`../config.m4')
22
23C		   cycles/limb
24C UltraSPARC 1&2:     2
25C UltraSPARC 3:	      3.25
26
27C INPUT PARAMETERS
28define(`rp',`%i0')
29define(`up',`%i1')
30define(`n',`%i2')
31define(`cnt',`%i3')
32
33define(`u0',`%l0')
34define(`u1',`%l2')
35define(`u2',`%l4')
36define(`u3',`%l6')
37
38define(`tnc',`%i4')
39
40define(`fanop',`fitod %f0,%f2')		dnl  A quasi nop running in the FA pipe
41define(`fmnop',`fmuld %f0,%f0,%f4')	dnl  A quasi nop running in the FM pipe
42
43ASM_START()
44	REGISTER(%g2,#scratch)
45	REGISTER(%g3,#scratch)
46PROLOGUE(mpn_rshift)
47	save	%sp,-160,%sp
48
49	sub	%g0,cnt,tnc		C negate shift count
50	ldx	[up],u3			C load first limb
51	subcc	n,5,n
52	sllx	u3,tnc,%i5		C compute function result
53	srlx	u3,cnt,%g3
54	bl,pn	%icc,.Lend1234
55	fanop
56
57	subcc	n,4,n
58	ldx	[up+8],u0
59	ldx	[up+16],u1
60	add	up,32,up
61	ldx	[up-8],u2
62	ldx	[up+0],u3
63	sllx	u0,tnc,%g2
64
65	bl,pn	%icc,.Lend5678
66	fanop
67
68	b,a	.Loop
69	.align	16
70.Loop:
71	srlx	u0,cnt,%g1
72	or	%g3,%g2,%g3
73	ldx	[up+8],u0
74	fanop
75C --
76	sllx	u1,tnc,%g2
77	subcc	n,4,n
78	stx	%g3,[rp+0]
79	fanop
80C --
81	srlx	u1,cnt,%g3
82	or	%g1,%g2,%g1
83	ldx	[up+16],u1
84	fanop
85C --
86	sllx	u2,tnc,%g2
87	stx	%g1,[rp+8]
88	add	up,32,up
89	fanop
90C --
91	srlx	u2,cnt,%g1
92	or	%g3,%g2,%g3
93	ldx	[up-8],u2
94	fanop
95C --
96	sllx	u3,tnc,%g2
97	stx	%g3,[rp+16]
98	add	rp,32,rp
99	fanop
100C --
101	srlx	u3,cnt,%g3
102	or	%g1,%g2,%g1
103	ldx	[up+0],u3
104	fanop
105C --
106	sllx	u0,tnc,%g2
107	stx	%g1,[rp-8]
108	bge,pt	%icc,.Loop
109	fanop
110C --
111.Lend5678:
112	srlx	u0,cnt,%g1
113	or	%g3,%g2,%g3
114	sllx	u1,tnc,%g2
115	stx	%g3,[rp+0]
116	srlx	u1,cnt,%g3
117	or	%g1,%g2,%g1
118	sllx	u2,tnc,%g2
119	stx	%g1,[rp+8]
120	srlx	u2,cnt,%g1
121	or	%g3,%g2,%g3
122	sllx	u3,tnc,%g2
123	stx	%g3,[rp+16]
124	add	rp,32,rp
125	srlx	u3,cnt,%g3		C carry...
126	or	%g1,%g2,%g1
127	stx	%g1,[rp-8]
128
129.Lend1234:
130	addcc	n,4,n
131	bz,pn	%icc,.Lret
132	fanop
133.Loop0:
134	add	rp,8,rp
135	subcc	n,1,n
136	ldx	[up+8],u3
137	add	up,8,up
138	sllx	u3,tnc,%g2
139	or	%g3,%g2,%g3
140	stx	%g3,[rp-8]
141	srlx	u3,cnt,%g3
142	bnz,pt	%icc,.Loop0
143	fanop
144.Lret:
145	stx	%g3,[rp+0]
146	mov	%i5,%i0
147	ret
148	restore
149EPILOGUE(mpn_rshift)
150