1dnl  IBM POWER mpn_submul_1 -- Multiply a limb vector with a limb and subtract
2dnl  the result from a second limb vector.
3
4dnl  Copyright 1992, 1994, 1999, 2000, 2001 Free Software Foundation, Inc.
5
6dnl  This file is part of the GNU MP Library.
7
8dnl  The GNU MP Library is free software; you can redistribute it and/or modify
9dnl  it under the terms of the GNU Lesser General Public License as published
10dnl  by the Free Software Foundation; either version 3 of the License, or (at
11dnl  your option) any later version.
12
13dnl  The GNU MP Library is distributed in the hope that it will be useful, but
14dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16dnl  License for more details.
17
18dnl  You should have received a copy of the GNU Lesser General Public License
19dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
20
21
22dnl  INPUT PARAMETERS
23dnl  res_ptr	r3
24dnl  s1_ptr	r4
25dnl  size	r5
26dnl  s2_limb	r6
27
28dnl  The POWER architecture has no unsigned 32x32->64 bit multiplication
29dnl  instruction.  To obtain that operation, we have to use the 32x32->64
30dnl  signed multiplication instruction, and add the appropriate compensation to
31dnl  the high limb of the result.  We add the multiplicand if the multiplier
32dnl  has its most significant bit set, and we add the multiplier if the
33dnl  multiplicand has its most significant bit set.  We need to preserve the
34dnl  carry flag between each iteration, so we have to compute the compensation
35dnl  carefully (the natural, srai+and doesn't work).  Since all POWER can
36dnl  branch in zero cycles, we use conditional branches for the compensation.
37
38include(`../config.m4')
39
40ASM_START()
41PROLOGUE(mpn_submul_1)
42	cal	3,-4(3)
43	l	0,0(4)
44	cmpi	0,6,0
45	mtctr	5
46	mul	9,0,6
47	srai	7,0,31
48	and	7,7,6
49	mfmq	11
50	cax	9,9,7
51	l	7,4(3)
52	sf	8,11,7		C add res_limb
53	a	11,8,11		C invert cy (r11 is junk)
54	blt	Lneg
55Lpos:	bdz	Lend
56
57Lploop:	lu	0,4(4)
58	stu	8,4(3)
59	cmpi	0,0,0
60	mul	10,0,6
61	mfmq	0
62	ae	11,0,9		C low limb + old_cy_limb + old cy
63	l	7,4(3)
64	aze	10,10		C propagate cy to new cy_limb
65	sf	8,11,7		C add res_limb
66	a	11,8,11		C invert cy (r11 is junk)
67	bge	Lp0
68	cax	10,10,6		C adjust high limb for negative limb from s1
69Lp0:	bdz	Lend0
70	lu	0,4(4)
71	stu	8,4(3)
72	cmpi	0,0,0
73	mul	9,0,6
74	mfmq	0
75	ae	11,0,10
76	l	7,4(3)
77	aze	9,9
78	sf	8,11,7
79	a	11,8,11		C invert cy (r11 is junk)
80	bge	Lp1
81	cax	9,9,6		C adjust high limb for negative limb from s1
82Lp1:	bdn	Lploop
83
84	b	Lend
85
86Lneg:	cax	9,9,0
87	bdz	Lend
88Lnloop:	lu	0,4(4)
89	stu	8,4(3)
90	cmpi	0,0,0
91	mul	10,0,6
92	mfmq	7
93	ae	11,7,9
94	l	7,4(3)
95	ae	10,10,0		C propagate cy to new cy_limb
96	sf	8,11,7		C add res_limb
97	a	11,8,11		C invert cy (r11 is junk)
98	bge	Ln0
99	cax	10,10,6		C adjust high limb for negative limb from s1
100Ln0:	bdz	Lend0
101	lu	0,4(4)
102	stu	8,4(3)
103	cmpi	0,0,0
104	mul	9,0,6
105	mfmq	7
106	ae	11,7,10
107	l	7,4(3)
108	ae	9,9,0		C propagate cy to new cy_limb
109	sf	8,11,7		C add res_limb
110	a	11,8,11		C invert cy (r11 is junk)
111	bge	Ln1
112	cax	9,9,6		C adjust high limb for negative limb from s1
113Ln1:	bdn	Lnloop
114	b	Lend
115
116Lend0:	cal	9,0(10)
117Lend:	st	8,4(3)
118	aze	3,9
119	br
120EPILOGUE(mpn_submul_1)
121