1dnl  x86 mpn_mul_1 (for 386, 486, and Pentium Pro) -- Multiply a limb vector
2dnl  with a limb and store the result in a second limb vector.
3
4dnl  Copyright 1992, 1994, 1997, 1998, 1999, 2000, 2001, 2002, 2005 Free
5dnl  Software Foundation, Inc.
6dnl
7dnl  This file is part of the GNU MP Library.
8dnl
9dnl  The GNU MP Library is free software; you can redistribute it and/or
10dnl  modify it under the terms of the GNU Lesser General Public License as
11dnl  published by the Free Software Foundation; either version 3 of the
12dnl  License, or (at your option) any later version.
13dnl
14dnl  The GNU MP Library is distributed in the hope that it will be useful,
15dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
16dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17dnl  Lesser General Public License for more details.
18dnl
19dnl  You should have received a copy of the GNU Lesser General Public License
20dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
21
22include(`../config.m4')
23
24
25C                           cycles/limb
26C P5:                           12.5
27C P6 model 0-8,10-12)            5.5
28C P6 model 9  (Banias)
29C P6 model 13 (Dothan)           5.25
30C P4 model 0  (Willamette)      19.0
31C P4 model 1  (?)               19.0
32C P4 model 2  (Northwood)       19.0
33C P4 model 3  (Prescott)
34C P4 model 4  (Nocona)
35C K6:                           10.5
36C K7:                            4.5
37C K8:
38
39
40C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
41C                      mp_limb_t multiplier);
42
43defframe(PARAM_MULTIPLIER,16)
44defframe(PARAM_SIZE,      12)
45defframe(PARAM_SRC,       8)
46defframe(PARAM_DST,       4)
47
48	TEXT
49	ALIGN(8)
50PROLOGUE(mpn_mul_1)
51deflit(`FRAME',0)
52
53	pushl	%edi
54	pushl	%esi
55	pushl	%ebx
56	pushl	%ebp
57deflit(`FRAME',16)
58
59	movl	PARAM_DST,%edi
60	movl	PARAM_SRC,%esi
61	movl	PARAM_SIZE,%ecx
62
63	xorl	%ebx,%ebx
64	andl	$3,%ecx
65	jz	L(end0)
66
67L(oop0):
68	movl	(%esi),%eax
69	mull	PARAM_MULTIPLIER
70	leal	4(%esi),%esi
71	addl	%ebx,%eax
72	movl	$0,%ebx
73	adcl	%ebx,%edx
74	movl	%eax,(%edi)
75	movl	%edx,%ebx	C propagate carry into cylimb
76
77	leal	4(%edi),%edi
78	decl	%ecx
79	jnz	L(oop0)
80
81L(end0):
82	movl	PARAM_SIZE,%ecx
83	shrl	$2,%ecx
84	jz	L(end)
85
86
87	ALIGN(8)
88L(oop):	movl	(%esi),%eax
89	mull	PARAM_MULTIPLIER
90	addl	%eax,%ebx
91	movl	$0,%ebp
92	adcl	%edx,%ebp
93
94	movl	4(%esi),%eax
95	mull	PARAM_MULTIPLIER
96	movl	%ebx,(%edi)
97	addl	%eax,%ebp	C new lo + cylimb
98	movl	$0,%ebx
99	adcl	%edx,%ebx
100
101	movl	8(%esi),%eax
102	mull	PARAM_MULTIPLIER
103	movl	%ebp,4(%edi)
104	addl	%eax,%ebx	C new lo + cylimb
105	movl	$0,%ebp
106	adcl	%edx,%ebp
107
108	movl	12(%esi),%eax
109	mull	PARAM_MULTIPLIER
110	movl	%ebx,8(%edi)
111	addl	%eax,%ebp	C new lo + cylimb
112	movl	$0,%ebx
113	adcl	%edx,%ebx
114
115	movl	%ebp,12(%edi)
116
117	leal	16(%esi),%esi
118	leal	16(%edi),%edi
119	decl	%ecx
120	jnz	L(oop)
121
122L(end):	movl	%ebx,%eax
123
124	popl	%ebp
125	popl	%ebx
126	popl	%esi
127	popl	%edi
128	ret
129
130EPILOGUE()
131