1dnl  IBM POWER mpn_mul_1 -- Multiply a limb vector with a limb and store the
2dnl  result in a second limb vector.
3
4dnl  Copyright 1992, 1994, 1999, 2000, 2001 Free Software Foundation, Inc.
5
6dnl  This file is part of the GNU MP Library.
7
8dnl  The GNU MP Library is free software; you can redistribute it and/or modify
9dnl  it under the terms of the GNU Lesser General Public License as published
10dnl  by the Free Software Foundation; either version 3 of the License, or (at
11dnl  your option) any later version.
12
13dnl  The GNU MP Library is distributed in the hope that it will be useful, but
14dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16dnl  License for more details.
17
18dnl  You should have received a copy of the GNU Lesser General Public License
19dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
20
21
22dnl  INPUT PARAMETERS
23dnl  res_ptr	r3
24dnl  s1_ptr	r4
25dnl  size	r5
26dnl  s2_limb	r6
27
28dnl  The POWER architecture has no unsigned 32x32->64 bit multiplication
29dnl  instruction.  To obtain that operation, we have to use the 32x32->64
30dnl  signed multiplication instruction, and add the appropriate compensation to
31dnl  the high limb of the result.  We add the multiplicand if the multiplier
32dnl  has its most significant bit set, and we add the multiplier if the
33dnl  multiplicand has its most significant bit set.  We need to preserve the
34dnl  carry flag between each iteration, so we have to compute the compensation
35dnl  carefully (the natural, srai+and doesn't work).  Since all POWER can
36dnl  branch in zero cycles, we use conditional branches for the compensation.
37
38include(`../config.m4')
39
40ASM_START()
41PROLOGUE(mpn_mul_1)
42	cal	3,-4(3)
43	l	0,0(4)
44	cmpi	0,6,0
45	mtctr	5
46	mul	9,0,6
47	srai	7,0,31
48	and	7,7,6
49	mfmq	8
50	ai	0,0,0		C reset carry
51	cax	9,9,7
52	blt	Lneg
53Lpos:	bdz	Lend
54Lploop:	lu	0,4(4)
55	stu	8,4(3)
56	cmpi	0,0,0
57	mul	10,0,6
58	mfmq	0
59	ae	8,0,9
60	bge	Lp0
61	cax	10,10,6		C adjust high limb for negative limb from s1
62Lp0:	bdz	Lend0
63	lu	0,4(4)
64	stu	8,4(3)
65	cmpi	0,0,0
66	mul	9,0,6
67	mfmq	0
68	ae	8,0,10
69	bge	Lp1
70	cax	9,9,6		C adjust high limb for negative limb from s1
71Lp1:	bdn	Lploop
72	b	Lend
73
74Lneg:	cax	9,9,0
75	bdz	Lend
76Lnloop:	lu	0,4(4)
77	stu	8,4(3)
78	cmpi	0,0,0
79	mul	10,0,6
80	cax	10,10,0		C adjust high limb for negative s2_limb
81	mfmq	0
82	ae	8,0,9
83	bge	Ln0
84	cax	10,10,6		C adjust high limb for negative limb from s1
85Ln0:	bdz	Lend0
86	lu	0,4(4)
87	stu	8,4(3)
88	cmpi	0,0,0
89	mul	9,0,6
90	cax	9,9,0		C adjust high limb for negative s2_limb
91	mfmq	0
92	ae	8,0,10
93	bge	Ln1
94	cax	9,9,6		C adjust high limb for negative limb from s1
95Ln1:	bdn	Lnloop
96	b	Lend
97
98Lend0:	cal	9,0(10)
99Lend:	st	8,4(3)
100	aze	3,9
101	br
102EPILOGUE(mpn_mul_1)
103