1dnl  SPARC mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
2dnl  store difference in a third limb vector.
3
4dnl  Copyright 2001 Free Software Foundation, Inc.
5
6dnl  This file is part of the GNU MP Library.
7
8dnl  The GNU MP Library is free software; you can redistribute it and/or modify
9dnl  it under the terms of the GNU Lesser General Public License as published
10dnl  by the Free Software Foundation; either version 3 of the License, or (at
11dnl  your option) any later version.
12
13dnl  The GNU MP Library is distributed in the hope that it will be useful, but
14dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16dnl  License for more details.
17
18dnl  You should have received a copy of the GNU Lesser General Public License
19dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
20
21
22include(`../config.m4')
23
24C INPUT PARAMETERS
25define(rp,%o0)
26define(s1p,%o1)
27define(s2p,%o2)
28define(n,%o3)
29define(cy,%g1)
30
31C This code uses 64-bit operations on `o' and `g' registers.  It doesn't
32C require that `o' registers' upper 32 bits are preserved by the operating
33C system, but if they are not, they must be zeroed.  That is indeed what
34C happens at least on Slowaris 2.5 and 2.6.
35
36C On UltraSPARC 1 and 2, this code runs at 3 cycles/limb from the Dcache and at
37C about 10 cycles/limb from the Ecache.
38
39ASM_START()
40PROLOGUE(mpn_sub_n)
41	lduw	[s1p+0],%o4
42	lduw	[s2p+0],%o5
43	addcc	n,-2,n
44	bl,pn	%icc,L(end1)
45	lduw	[s1p+4],%g2
46	lduw	[s2p+4],%g3
47	be,pn	%icc,L(end2)
48	mov	0,cy
49
50	.align	16
51L(loop):
52	sub	%o4,%o5,%g4
53	add	rp,8,rp
54	lduw	[s1p+8],%o4
55	fitod	%f0,%f2
56C ---
57	sub	%g4,cy,%g4
58	addcc	n,-1,n
59	lduw	[s2p+8],%o5
60	fitod	%f0,%f2
61C ---
62	srlx	%g4,63,cy
63	add	s2p,8,s2p
64	stw	%g4,[rp-8]
65	be,pn	%icc,L(exito)+4
66C ---
67	sub	%g2,%g3,%g4
68	addcc	n,-1,n
69	lduw	[s1p+12],%g2
70	fitod	%f0,%f2
71C ---
72	sub	%g4,cy,%g4
73	add	s1p,8,s1p
74	lduw	[s2p+4],%g3
75	fitod	%f0,%f2
76C ---
77	srlx	%g4,63,cy
78	bne,pt	%icc,L(loop)
79	stw	%g4,[rp-4]
80C ---
81L(exite):
82	sub	%o4,%o5,%g4
83	sub	%g4,cy,%g4
84	srlx	%g4,63,cy
85	stw	%g4,[rp+0]
86	sub	%g2,%g3,%g4
87	sub	%g4,cy,%g4
88	stw	%g4,[rp+4]
89	retl
90	srlx	%g4,63,%o0
91
92L(exito):
93	sub	%g2,%g3,%g4
94	sub	%g4,cy,%g4
95	srlx	%g4,63,cy
96	stw	%g4,[rp-4]
97	sub	%o4,%o5,%g4
98	sub	%g4,cy,%g4
99	stw	%g4,[rp+0]
100	retl
101	srlx	%g4,63,%o0
102
103L(end1):
104	sub	%o4,%o5,%g4
105	stw	%g4,[rp+0]
106	retl
107	srlx	%g4,63,%o0
108
109L(end2):
110	sub	%o4,%o5,%g4
111	srlx	%g4,63,cy
112	stw	%g4,[rp+0]
113	sub	%g2,%g3,%g4
114	sub	%g4,cy,%g4
115	stw	%g4,[rp+4]
116	retl
117	srlx	%g4,63,%o0
118EPILOGUE(mpn_sub_n)
119