1dnl  Intel Pentium mpn_addmul_1 -- mpn by limb multiplication.
2
3dnl  Copyright 1992, 1994, 1996, 1999, 2000, 2002 Free Software Foundation,
4dnl  Inc.
5dnl
6dnl  This file is part of the GNU MP Library.
7dnl
8dnl  The GNU MP Library is free software; you can redistribute it and/or
9dnl  modify it under the terms of the GNU Lesser General Public License as
10dnl  published by the Free Software Foundation; either version 3 of the
11dnl  License, or (at your option) any later version.
12dnl
13dnl  The GNU MP Library is distributed in the hope that it will be useful,
14dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
15dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16dnl  Lesser General Public License for more details.
17dnl
18dnl  You should have received a copy of the GNU Lesser General Public License
19dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
20
21include(`../config.m4')
22
23
24C P5: 14.0 cycles/limb
25
26
27ifdef(`OPERATION_addmul_1', `
28      define(M4_inst,        addl)
29      define(M4_function_1,  mpn_addmul_1)
30      define(M4_function_1c, mpn_addmul_1c)
31
32',`ifdef(`OPERATION_submul_1', `
33      define(M4_inst,        subl)
34      define(M4_function_1,  mpn_submul_1)
35      define(M4_function_1c, mpn_submul_1c)
36
37',`m4_error(`Need OPERATION_addmul_1 or OPERATION_submul_1
38')')')
39
40MULFUNC_PROLOGUE(mpn_addmul_1 mpn_addmul_1c mpn_submul_1 mpn_submul_1c)
41
42
43C mp_limb_t mpn_addmul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
44C                         mp_limb_t mult);
45C mp_limb_t mpn_addmul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
46C                          mp_limb_t mult, mp_limb_t carry);
47C
48C mp_limb_t mpn_submul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
49C                         mp_limb_t mult);
50C mp_limb_t mpn_submul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
51C                          mp_limb_t mult, mp_limb_t carry);
52C
53
54defframe(PARAM_CARRY,     20)
55defframe(PARAM_MULTIPLIER,16)
56defframe(PARAM_SIZE,      12)
57defframe(PARAM_SRC,       8)
58defframe(PARAM_DST,       4)
59
60	TEXT
61
62	ALIGN(8)
63PROLOGUE(M4_function_1c)
64deflit(`FRAME',0)
65
66	movl	PARAM_CARRY, %ecx
67	pushl	%esi		FRAME_pushl()
68
69	jmp	L(start_1c)
70
71EPILOGUE()
72
73
74	ALIGN(8)
75PROLOGUE(M4_function_1)
76deflit(`FRAME',0)
77
78	xorl	%ecx, %ecx
79	pushl	%esi		FRAME_pushl()
80
81L(start_1c):
82	movl	PARAM_SRC, %esi
83	movl	PARAM_SIZE, %eax
84
85	pushl	%edi		FRAME_pushl()
86	pushl	%ebx		FRAME_pushl()
87
88	movl	PARAM_DST, %edi
89	leal	-1(%eax), %ebx		C size-1
90
91	leal	(%esi,%eax,4), %esi
92	xorl	$-1, %ebx		C -size, and clear carry
93
94	leal	(%edi,%eax,4), %edi
95
96L(top):
97	C eax
98	C ebx	counter, negative
99	C ecx	carry
100	C edx
101	C esi	src end
102	C edi	dst end
103	C ebp
104
105	adcl	$0, %ecx
106	movl	(%esi,%ebx,4), %eax
107
108	mull	PARAM_MULTIPLIER
109
110	addl	%ecx, %eax
111	movl	(%edi,%ebx,4), %ecx
112
113	adcl	$0, %edx
114	M4_inst	%eax, %ecx
115
116	movl	%ecx, (%edi,%ebx,4)
117	incl	%ebx
118
119	movl	%edx, %ecx
120	jnz	L(top)
121
122
123	adcl	$0, %ecx
124	popl	%ebx
125
126	movl	%ecx, %eax
127	popl	%edi
128
129	popl	%esi
130
131	ret
132
133EPILOGUE()
134