1dnl  x86 __gmpn_addmul_1 (for 386 and 486) -- Multiply a limb vector with a
2dnl  limb and add the result to a second limb vector.
3
4dnl  Copyright 1992, 1994, 1997, 1999, 2000, 2001, 2002, 2005 Free Software
5dnl  Foundation, Inc.
6dnl
7dnl  This file is part of the GNU MP Library.
8dnl
9dnl  The GNU MP Library is free software; you can redistribute it and/or
10dnl  modify it under the terms of the GNU Lesser General Public License as
11dnl  published by the Free Software Foundation; either version 3 of the
12dnl  License, or (at your option) any later version.
13dnl
14dnl  The GNU MP Library is distributed in the hope that it will be useful,
15dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
16dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17dnl  Lesser General Public License for more details.
18dnl
19dnl  You should have received a copy of the GNU Lesser General Public License
20dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
21
22include(`../config.m4')
23
24
25C                           cycles/limb
26C P5:                           14.75
27C P6 model 0-8,10-12)            7.5
28C P6 model 9  (Banias)
29C P6 model 13 (Dothan)           6.75
30C P4 model 0  (Willamette)      24.0
31C P4 model 1  (?)               24.0
32C P4 model 2  (Northwood)       24.0
33C P4 model 3  (Prescott)
34C P4 model 4  (Nocona)
35C K6:                           12.5
36C K7:                            5.25
37C K8:
38
39
40ifdef(`OPERATION_addmul_1',`
41      define(M4_inst,        addl)
42      define(M4_function_1,  mpn_addmul_1)
43
44',`ifdef(`OPERATION_submul_1',`
45      define(M4_inst,        subl)
46      define(M4_function_1,  mpn_submul_1)
47
48',`m4_error(`Need OPERATION_addmul_1 or OPERATION_submul_1
49')')')
50
51MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
52
53
54C mp_limb_t M4_function_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
55C                          mp_limb_t mult);
56
57define(PARAM_MULTIPLIER, `FRAME+16(%esp)')
58define(PARAM_SIZE,       `FRAME+12(%esp)')
59define(PARAM_SRC,        `FRAME+8(%esp)')
60define(PARAM_DST,        `FRAME+4(%esp)')
61
62	TEXT
63	ALIGN(8)
64
65PROLOGUE(M4_function_1)
66deflit(`FRAME',0)
67
68	pushl	%edi
69	pushl	%esi
70	pushl	%ebx
71	pushl	%ebp
72deflit(`FRAME',16)
73
74	movl	PARAM_DST,%edi
75	movl	PARAM_SRC,%esi
76	movl	PARAM_SIZE,%ecx
77
78	xorl	%ebx,%ebx
79	andl	$3,%ecx
80	jz	L(end0)
81
82L(oop0):
83	movl	(%esi),%eax
84	mull	PARAM_MULTIPLIER
85	leal	4(%esi),%esi
86	addl	%ebx,%eax
87	movl	$0,%ebx
88	adcl	%ebx,%edx
89	M4_inst	%eax,(%edi)
90	adcl	%edx,%ebx	C propagate carry into cylimb
91
92	leal	4(%edi),%edi
93	decl	%ecx
94	jnz	L(oop0)
95
96L(end0):
97	movl	PARAM_SIZE,%ecx
98	shrl	$2,%ecx
99	jz	L(end)
100
101	ALIGN(8)
102L(oop):	movl	(%esi),%eax
103	mull	PARAM_MULTIPLIER
104	addl	%eax,%ebx
105	movl	$0,%ebp
106	adcl	%edx,%ebp
107
108	movl	4(%esi),%eax
109	mull	PARAM_MULTIPLIER
110	M4_inst	%ebx,(%edi)
111	adcl	%eax,%ebp	C new lo + cylimb
112	movl	$0,%ebx
113	adcl	%edx,%ebx
114
115	movl	8(%esi),%eax
116	mull	PARAM_MULTIPLIER
117	M4_inst	%ebp,4(%edi)
118	adcl	%eax,%ebx	C new lo + cylimb
119	movl	$0,%ebp
120	adcl	%edx,%ebp
121
122	movl	12(%esi),%eax
123	mull	PARAM_MULTIPLIER
124	M4_inst	%ebx,8(%edi)
125	adcl	%eax,%ebp	C new lo + cylimb
126	movl	$0,%ebx
127	adcl	%edx,%ebx
128
129	M4_inst	%ebp,12(%edi)
130	adcl	$0,%ebx		C propagate carry into cylimb
131
132	leal	16(%esi),%esi
133	leal	16(%edi),%edi
134	decl	%ecx
135	jnz	L(oop)
136
137L(end):	movl	%ebx,%eax
138
139	popl	%ebp
140	popl	%ebx
141	popl	%esi
142	popl	%edi
143	ret
144
145EPILOGUE()
146