1193323Seddnl  Alpha mpn_submul_1 -- Multiply a limb vector with a limb and subtract
2193323Seddnl  the result from a second limb vector.
3193323Sed
4193323Seddnl  Copyright 1992, 1994, 1995, 2000, 2002 Free Software Foundation, Inc.
5193323Sed
6193323Seddnl  This file is part of the GNU MP Library.
7193323Seddnl
8193323Seddnl  The GNU MP Library is free software; you can redistribute it and/or modify
9193323Seddnl  it under the terms of either:
10193323Seddnl
11193323Seddnl    * the GNU Lesser General Public License as published by the Free
12193323Seddnl      Software Foundation; either version 3 of the License, or (at your
13193323Seddnl      option) any later version.
14193323Seddnl
15193323Seddnl  or
16193323Seddnl
17193323Seddnl    * the GNU General Public License as published by the Free Software
18193323Seddnl      Foundation; either version 2 of the License, or (at your option) any
19193323Seddnl      later version.
20193323Seddnl
21193323Seddnl  or both in parallel, as here.
22193323Seddnl
23193323Seddnl  The GNU MP Library is distributed in the hope that it will be useful, but
24198090Srdivackydnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
25193323Seddnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
26193323Seddnl  for more details.
27193323Seddnl
28193323Seddnl  You should have received copies of the GNU General Public License and the
29193323Seddnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
30193323Seddnl  see https://www.gnu.org/licenses/.
31193323Sed
32193323Sedinclude(`../config.m4')
33193323Sed
34198090SrdivackyC      cycles/limb
35198090SrdivackyC EV4:     42
36198090SrdivackyC EV5:     18
37193323SedC EV6:      7
38193323Sed
39193323SedC  INPUT PARAMETERS
40193323SedC  rp	r16
41193323SedC  up	r17
42193323SedC  n	r18
43193323SedC  limb	r19
44193323Sed
45195098Sed
46193323SedASM_START()
47193323SedPROLOGUE(mpn_submul_1)
48193323Sed	ldq	r2,0(r17)	C r2 = s1_limb
49193323Sed	addq	r17,8,r17	C s1_ptr++
50193323Sed	subq	r18,1,r18	C size--
51193323Sed	mulq	r2,r19,r3	C r3 = prod_low
52198090Srdivacky	ldq	r5,0(r16)	C r5 = *res_ptr
53198090Srdivacky	umulh	r2,r19,r0	C r0 = prod_high
54198090Srdivacky	beq	r18,$Lend1	C jump if size was == 1
55193323Sed	ldq	r2,0(r17)	C r2 = s1_limb
56193323Sed	addq	r17,8,r17	C s1_ptr++
57193323Sed	subq	r18,1,r18	C size--
58193323Sed	subq	r5,r3,r3
59193323Sed	cmpult	r5,r3,r4
60194612Sed	stq	r3,0(r16)
61194612Sed	addq	r16,8,r16	C res_ptr++
62198090Srdivacky	beq	r18,$Lend2	C jump if size was == 2
63198090Srdivacky
64194612Sed	ALIGN(8)
65194612Sed$Loop:	mulq	r2,r19,r3	C r3 = prod_low
66194612Sed	ldq	r5,0(r16)	C r5 = *res_ptr
67194612Sed	addq	r4,r0,r0	C cy_limb = cy_limb + 'cy'
68193323Sed	subq	r18,1,r18	C size--
69193323Sed	umulh	r2,r19,r4	C r4 = cy_limb
70195340Sed	ldq	r2,0(r17)	C r2 = s1_limb
71195340Sed	addq	r17,8,r17	C s1_ptr++
72193323Sed	addq	r3,r0,r3	C r3 = cy_limb + prod_low
73193323Sed	cmpult	r3,r0,r0	C r0 = carry from (cy_limb + prod_low)
74193323Sed	subq	r5,r3,r3
75193323Sed	cmpult	r5,r3,r5
76193323Sed	stq	r3,0(r16)
77193323Sed	addq	r16,8,r16	C res_ptr++
78193323Sed	addq	r5,r0,r0	C combine carries
79193323Sed	bne	r18,$Loop
80198090Srdivacky
81198090Srdivacky$Lend2:	mulq	r2,r19,r3	C r3 = prod_low
82193323Sed	ldq	r5,0(r16)	C r5 = *res_ptr
83193323Sed	addq	r4,r0,r0	C cy_limb = cy_limb + 'cy'
84195340Sed	umulh	r2,r19,r4	C r4 = cy_limb
85195340Sed	addq	r3,r0,r3	C r3 = cy_limb + prod_low
86193323Sed	cmpult	r3,r0,r0	C r0 = carry from (cy_limb + prod_low)
87193323Sed	subq	r5,r3,r3
88195340Sed	cmpult	r5,r3,r5
89193323Sed	stq	r3,0(r16)
90193323Sed	addq	r5,r0,r0	C combine carries
91193323Sed	addq	r4,r0,r0	C cy_limb = prod_high + cy
92193323Sed	ret	r31,(r26),1
93193323Sed$Lend1:	subq	r5,r3,r3
94193323Sed	cmpult	r5,r3,r5
95193323Sed	stq	r3,0(r16)
96193323Sed	addq	r0,r5,r0
97193323Sed	ret	r31,(r26),1
98193323SedEPILOGUE(mpn_submul_1)
99193323SedASM_END()
100193323Sed