1dnl Alpha mpn_mod_34lsub1.
2
3dnl  Copyright 2002 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33C      cycles/limb
34C EV4:     4 (?)
35C EV5:     2.67
36C EV6:     1.67
37
38
39dnl  INPUT PARAMETERS
40dnl  up		r16
41dnl  n		r17
42
43define(`l0',`r18')
44define(`l1',`r19')
45define(`l2',`r20')
46define(`a0',`r21')
47define(`a1',`r22')
48define(`a2',`r23')
49define(`c0',`r24')
50define(`c1',`r5')
51define(`c2',`r6')
52
53ASM_START()
54PROLOGUE(mpn_mod_34lsub1)
55	bis	r31, r31, c0
56	bis	r31, r31, c1
57	bis	r31, r31, c2
58
59	lda	r17, -3(r17)
60	bge	r17, $L_3_or_more
61	bis	r31, r31, a0
62	bis	r31, r31, a1
63	bis	r31, r31, a2
64	br	r31, $L_012
65
66$L_3_or_more:
67	ldq	a0, 0(r16)
68	ldq	a1, 8(r16)
69	ldq	a2, 16(r16)
70	lda	r16, 24(r16)
71	lda	r17, -3(r17)
72	blt	r17, $L_012
73
74$L_6_or_more:
75	ldq	l0, 0(r16)
76	ldq	l1, 8(r16)
77	ldq	l2, 16(r16)
78	addq	l0, a0, a0
79
80	lda	r16, 24(r16)
81	lda	r17, -3(r17)
82	blt	r17, $L_end
83
84	ALIGN(16)
85C Main loop
86$L_9_or_more:
87$Loop:	cmpult	a0, l0, r0
88	ldq	l0, 0(r16)
89	addq	r0, c0, c0
90	addq	l1, a1, a1
91	cmpult	a1, l1, r0
92	ldq	l1, 8(r16)
93	addq	r0, c1, c1
94	addq	l2, a2, a2
95	cmpult	a2, l2, r0
96	ldq	l2, 16(r16)
97	addq	r0, c2, c2
98	addq	l0, a0, a0
99	lda	r16, 24(r16)
100	lda	r17, -3(r17)
101	bge	r17, $Loop
102
103$L_end:	cmpult	a0, l0, r0
104	addq	r0, c0, c0
105	addq	l1, a1, a1
106	cmpult	a1, l1, r0
107	addq	r0, c1, c1
108	addq	l2, a2, a2
109	cmpult	a2, l2, r0
110	addq	r0, c2, c2
111
112C Handle the last (n mod 3) limbs
113$L_012:	lda	r17, 2(r17)
114	blt	r17, $L_0
115	ldq	l0, 0(r16)
116	addq	l0, a0, a0
117	cmpult	a0, l0, r0
118	addq	r0, c0, c0
119	beq	r17, $L_0
120	ldq	l1, 8(r16)
121	addq	l1, a1, a1
122	cmpult	a1, l1, r0
123	addq	r0, c1, c1
124
125C Align and sum our 3 main accumulators and 3 carry accumulators
126$L_0:	srl	a0, 48, r2
127	srl	a1, 32, r4
128ifdef(`HAVE_LIMB_LITTLE_ENDIAN',
129`	insll	a1, 2, r1',		C (a1 & 0xffffffff) << 16
130`	zapnot	a1, 15, r25
131	sll	r25, 16, r1')
132	zapnot	a0, 63, r0		C a0 & 0xffffffffffff
133	srl	a2, 16, a1
134ifdef(`HAVE_LIMB_LITTLE_ENDIAN',
135`	inswl	a2, 4, r3',		C (a2 & 0xffff) << 32
136`	zapnot	a2, 3, r25
137	sll	r25, 32, r3')
138	addq	r1, r4, r1
139	addq	r0, r2, r0
140	srl	c0, 32, a2
141ifdef(`HAVE_LIMB_LITTLE_ENDIAN',
142`	insll	c0, 2, r4',		C (c0 & 0xffffffff) << 16
143`	zapnot	c0, 15, r25
144	sll	r25, 16, r4')
145	addq	r0, r1, r0
146	addq	r3, a1, r3
147	addq	r0, r3, r0
148	srl	c1, 16, c0
149ifdef(`HAVE_LIMB_LITTLE_ENDIAN',
150`	inswl	c1, 4, r2',		C (c1 & 0xffff) << 32
151`	zapnot	c1, 3, r25
152	sll	r25, 32, r2')
153	addq	r4, a2, r4
154C	srl	c2, 48, r3		C This will be 0 in practise
155	zapnot	c2, 63, r1		C r1 = c2 & 0xffffffffffff
156	addq	r0, r4, r0
157	addq	r2, c0, r2
158	addq	r0, r2, r0
159C	addq	r1, r3, r1
160	addq	r0, r1, r0
161
162	ret	r31, (r26), 1
163EPILOGUE(mpn_mod_34lsub1)
164ASM_END()
165