1dnl  Intel Pentium mpn_mul_2 -- mpn by 2-limb multiplication.
2
3dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
4dnl
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or
8dnl  modify it under the terms of the GNU Lesser General Public License as
9dnl  published by the Free Software Foundation; either version 3 of the
10dnl  License, or (at your option) any later version.
11dnl
12dnl  The GNU MP Library is distributed in the hope that it will be useful,
13dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
14dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15dnl  Lesser General Public License for more details.
16dnl
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20include(`../config.m4')
21
22
23C P5: 24.0 cycles/limb
24
25
26C mp_limb_t mpn_mul_2 (mp_ptr dst, mp_srcptr src, mp_size_t size,
27C                      mp_srcptr mult);
28C
29C At 24 c/l this is only 2 cycles faster than a separate mul_1 and addmul_1,
30C but has the advantage of making just one pass over the operands.
31C
32C There's not enough registers to use PARAM_MULT directly, so the multiplier
33C limbs are transferred to local variables on the stack.
34
35defframe(PARAM_MULT, 16)
36defframe(PARAM_SIZE, 12)
37defframe(PARAM_SRC,   8)
38defframe(PARAM_DST,   4)
39
40dnl  re-use parameter space
41define(VAR_MULT_LOW, `PARAM_SRC')
42define(VAR_MULT_HIGH,`PARAM_DST')
43
44	TEXT
45	ALIGN(8)
46PROLOGUE(mpn_mul_2)
47deflit(`FRAME',0)
48
49	pushl	%esi		FRAME_pushl()
50	pushl	%edi		FRAME_pushl()
51
52	movl	PARAM_SRC, %esi
53	movl	PARAM_DST, %edi
54
55	movl	PARAM_MULT, %eax
56	movl	PARAM_SIZE, %ecx
57
58	movl	4(%eax), %edx		C mult high
59	movl	(%eax), %eax		C mult low
60
61	movl	%eax, VAR_MULT_LOW
62	movl	%edx, VAR_MULT_HIGH
63
64	pushl	%ebx		FRAME_pushl()
65	pushl	%ebp		FRAME_pushl()
66
67	mull	(%esi)			C src[0] * mult[0]
68
69	movl	%eax, %ebp		C in case src==dst
70	movl	(%esi), %eax		C src[0]
71
72	movl	%ebp, (%edi)		C dst[0]
73	movl	%edx, %ebx		C initial low carry
74
75	xorl	%ebp, %ebp		C initial high carry
76	leal	(%edi,%ecx,4), %edi	C dst end
77
78	mull	VAR_MULT_HIGH		C src[0] * mult[1]
79
80	subl	$2, %ecx		C size-2
81	js	L(done)
82
83	leal	8(%esi,%ecx,4), %esi	C &src[size]
84	xorl	$-1, %ecx		C -(size-1)
85
86
87
88L(top):
89	C eax	low prod
90	C ebx	low carry
91	C ecx	counter, negative
92	C edx	high prod
93	C esi	src end
94	C edi	dst end
95	C ebp	high carry (0 or -1)
96
97	andl	$1, %ebp		C 1 or 0
98	addl	%eax, %ebx
99
100	adcl	%edx, %ebp
101	ASSERT(nc)
102	movl	(%esi,%ecx,4), %eax
103
104	mull	VAR_MULT_LOW
105
106	addl	%eax, %ebx		C low carry
107	movl	(%esi,%ecx,4), %eax
108
109	adcl	%ebp, %edx		C high carry
110	movl	%ebx, (%edi,%ecx,4)
111
112	sbbl	%ebp, %ebp		C new high carry, -1 or 0
113	movl	%edx, %ebx		C new low carry
114
115	mull	VAR_MULT_HIGH
116
117	incl	%ecx
118	jnz	L(top)
119
120
121L(done):
122	andl	$1, %ebp		C 1 or 0
123	addl	%ebx, %eax
124
125	adcl	%ebp, %edx
126	ASSERT(nc)
127	movl	%eax, (%edi)		C store carry low
128
129	movl	%edx, %eax		C return carry high
130
131	popl	%ebp
132	popl	%ebx
133
134	popl	%edi
135	popl	%esi
136
137	ret
138
139EPILOGUE()
140