1dnl  Intel Pentium mpn_mul_1 -- mpn by limb multiplication.
2
3dnl  Copyright 1992, 1994, 1996, 1999, 2000, 2002 Free Software Foundation,
4dnl  Inc.
5dnl
6dnl  This file is part of the GNU MP Library.
7dnl
8dnl  The GNU MP Library is free software; you can redistribute it and/or
9dnl  modify it under the terms of the GNU Lesser General Public License as
10dnl  published by the Free Software Foundation; either version 3 of the
11dnl  License, or (at your option) any later version.
12dnl
13dnl  The GNU MP Library is distributed in the hope that it will be useful,
14dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
15dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16dnl  Lesser General Public License for more details.
17dnl
18dnl  You should have received a copy of the GNU Lesser General Public License
19dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
20
21include(`../config.m4')
22
23
24C P5: 12.0 cycles/limb
25
26
27C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
28C                      mp_limb_t multiplier);
29C mp_limb_t mpn_mul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
30C                       mp_limb_t multiplier, mp_limb_t carry);
31C
32
33defframe(PARAM_CARRY,     20)
34defframe(PARAM_MULTIPLIER,16)
35defframe(PARAM_SIZE,      12)
36defframe(PARAM_SRC,       8)
37defframe(PARAM_DST,       4)
38
39	TEXT
40	ALIGN(8)
41PROLOGUE(mpn_mul_1c)
42deflit(`FRAME',0)
43
44	movl	PARAM_CARRY, %ecx
45	pushl	%esi		FRAME_pushl()
46
47	jmp	L(start_1c)
48
49EPILOGUE()
50
51
52	ALIGN(8)
53PROLOGUE(mpn_mul_1)
54deflit(`FRAME',0)
55
56	xorl	%ecx, %ecx
57	pushl	%esi		FRAME_pushl()
58
59L(start_1c):
60	movl	PARAM_SRC, %esi
61	movl	PARAM_SIZE, %eax
62
63	shrl	%eax
64	jnz	L(two_or_more)
65
66
67	C one limb only
68
69	movl	(%esi), %eax
70
71	mull	PARAM_MULTIPLIER
72
73	addl	%eax, %ecx
74	movl	PARAM_DST, %eax
75
76	adcl	$0, %edx
77	popl	%esi
78
79	movl	%ecx, (%eax)
80	movl	%edx, %eax
81
82	ret
83
84
85L(two_or_more):
86	C eax	size/2
87	C ebx
88	C ecx	carry
89	C edx
90	C esi	src
91	C edi
92	C ebp
93
94	pushl	%edi		FRAME_pushl()
95	pushl	%ebx		FRAME_pushl()
96
97	movl	PARAM_DST, %edi
98	leal	-1(%eax), %ebx		C size/2-1
99
100	notl	%ebx			C -size, preserve carry
101
102	leal	(%esi,%eax,8), %esi	C src end
103	leal	(%edi,%eax,8), %edi	C dst end
104
105	pushl	%ebp		FRAME_pushl()
106	jnc	L(top)
107
108
109	C size was odd, process one limb separately
110
111	movl	(%esi,%ebx,8), %eax
112	addl	$4, %esi
113
114	mull	PARAM_MULTIPLIER
115
116	addl	%ecx, %eax
117	movl	%edx, %ecx
118
119	movl	%eax, (%edi,%ebx,8)
120	leal	4(%edi), %edi
121
122
123L(top):
124	C eax
125	C ebx	counter, negative
126	C ecx	carry
127	C edx
128	C esi	src end
129	C edi	dst end
130	C ebp
131
132	adcl	$0, %ecx
133	movl	(%esi,%ebx,8), %eax
134
135	mull	PARAM_MULTIPLIER
136
137	movl	%edx, %ebp
138	addl	%eax, %ecx
139
140	adcl	$0, %ebp
141	movl	4(%esi,%ebx,8), %eax
142
143	mull	PARAM_MULTIPLIER
144
145	movl	%ecx, (%edi,%ebx,8)
146	addl	%ebp, %eax
147
148	movl	%eax, 4(%edi,%ebx,8)
149	incl	%ebx
150
151	movl	%edx, %ecx
152	jnz	L(top)
153
154
155	adcl	$0, %ecx
156	popl	%ebp
157
158	movl	%ecx, %eax
159	popl	%ebx
160
161	popl	%edi
162	popl	%esi
163
164	ret
165
166EPILOGUE()
167