1dnl  IBM POWER mpn_addmul_1 -- Multiply a limb vector with a limb and add the
2dnl  result to a second limb vector.
3
4dnl  Copyright 1992, 1994, 1999, 2000, 2001 Free Software Foundation, Inc.
5
6dnl  This file is part of the GNU MP Library.
7
8dnl  The GNU MP Library is free software; you can redistribute it and/or modify
9dnl  it under the terms of the GNU Lesser General Public License as published
10dnl  by the Free Software Foundation; either version 3 of the License, or (at
11dnl  your option) any later version.
12
13dnl  The GNU MP Library is distributed in the hope that it will be useful, but
14dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16dnl  License for more details.
17
18dnl  You should have received a copy of the GNU Lesser General Public License
19dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
20
21
22dnl  INPUT PARAMETERS
23dnl  res_ptr	r3
24dnl  s1_ptr	r4
25dnl  size	r5
26dnl  s2_limb	r6
27
28dnl  The POWER architecture has no unsigned 32x32->64 bit multiplication
29dnl  instruction.  To obtain that operation, we have to use the 32x32->64
30dnl  signed multiplication instruction, and add the appropriate compensation to
31dnl  the high limb of the result.  We add the multiplicand if the multiplier
32dnl  has its most significant bit set, and we add the multiplier if the
33dnl  multiplicand has its most significant bit set.  We need to preserve the
34dnl  carry flag between each iteration, so we have to compute the compensation
35dnl  carefully (the natural, srai+and doesn't work).  Since all POWER can
36dnl  branch in zero cycles, we use conditional branches for the compensation.
37
38include(`../config.m4')
39
40ASM_START()
41PROLOGUE(mpn_addmul_1)
42	cal	3,-4(3)
43	l	0,0(4)
44	cmpi	0,6,0
45	mtctr	5
46	mul	9,0,6
47	srai	7,0,31
48	and	7,7,6
49	mfmq	8
50	cax	9,9,7
51	l	7,4(3)
52	a	8,8,7		C add res_limb
53	blt	Lneg
54Lpos:	bdz	Lend
55
56Lploop:	lu	0,4(4)
57	stu	8,4(3)
58	cmpi	0,0,0
59	mul	10,0,6
60	mfmq	0
61	ae	8,0,9		C low limb + old_cy_limb + old cy
62	l	7,4(3)
63	aze	10,10		C propagate cy to new cy_limb
64	a	8,8,7		C add res_limb
65	bge	Lp0
66	cax	10,10,6		C adjust high limb for negative limb from s1
67Lp0:	bdz	Lend0
68	lu	0,4(4)
69	stu	8,4(3)
70	cmpi	0,0,0
71	mul	9,0,6
72	mfmq	0
73	ae	8,0,10
74	l	7,4(3)
75	aze	9,9
76	a	8,8,7
77	bge	Lp1
78	cax	9,9,6		C adjust high limb for negative limb from s1
79Lp1:	bdn	Lploop
80
81	b	Lend
82
83Lneg:	cax	9,9,0
84	bdz	Lend
85Lnloop:	lu	0,4(4)
86	stu	8,4(3)
87	cmpi	0,0,0
88	mul	10,0,6
89	mfmq	7
90	ae	8,7,9
91	l	7,4(3)
92	ae	10,10,0		C propagate cy to new cy_limb
93	a	8,8,7		C add res_limb
94	bge	Ln0
95	cax	10,10,6		C adjust high limb for negative limb from s1
96Ln0:	bdz	Lend0
97	lu	0,4(4)
98	stu	8,4(3)
99	cmpi	0,0,0
100	mul	9,0,6
101	mfmq	7
102	ae	8,7,10
103	l	7,4(3)
104	ae	9,9,0		C propagate cy to new cy_limb
105	a	8,8,7		C add res_limb
106	bge	Ln1
107	cax	9,9,6		C adjust high limb for negative limb from s1
108Ln1:	bdn	Lnloop
109	b	Lend
110
111Lend0:	cal	9,0(10)
112Lend:	st	8,4(3)
113	aze	3,9
114	br
115EPILOGUE(mpn_addmul_1)
116