1dnl  Intel Pentium mpn_mul_basecase -- mpn by mpn multiplication.
2
3dnl  Copyright 1996, 1998, 1999, 2000, 2002 Free Software Foundation, Inc.
4dnl
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or
8dnl  modify it under the terms of the GNU Lesser General Public License as
9dnl  published by the Free Software Foundation; either version 3 of the
10dnl  License, or (at your option) any later version.
11dnl
12dnl  The GNU MP Library is distributed in the hope that it will be useful,
13dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
14dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15dnl  Lesser General Public License for more details.
16dnl
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20include(`../config.m4')
21
22
23C P5: 14.2 cycles/crossproduct (approx)
24
25
26C void mpn_mul_basecase (mp_ptr wp,
27C                        mp_srcptr xp, mp_size_t xsize,
28C                        mp_srcptr yp, mp_size_t ysize);
29
30defframe(PARAM_YSIZE, 20)
31defframe(PARAM_YP,    16)
32defframe(PARAM_XSIZE, 12)
33defframe(PARAM_XP,    8)
34defframe(PARAM_WP,    4)
35
36defframe(VAR_COUNTER, -4)
37
38	TEXT
39	ALIGN(8)
40PROLOGUE(mpn_mul_basecase)
41
42	pushl	%eax			C dummy push for allocating stack slot
43	pushl	%esi
44	pushl	%ebp
45	pushl	%edi
46deflit(`FRAME',16)
47
48	movl	PARAM_XP,%esi
49	movl	PARAM_WP,%edi
50	movl	PARAM_YP,%ebp
51
52	movl	(%esi),%eax		C load xp[0]
53	mull	(%ebp)			C multiply by yp[0]
54	movl	%eax,(%edi)		C store to wp[0]
55	movl	PARAM_XSIZE,%ecx	C xsize
56	decl	%ecx			C If xsize = 1, ysize = 1 too
57	jz	L(done)
58
59	movl	PARAM_XSIZE,%eax
60	pushl	%ebx
61FRAME_pushl()
62	movl	%edx,%ebx
63	leal	(%esi,%eax,4),%esi	C make xp point at end
64	leal	(%edi,%eax,4),%edi	C offset wp by xsize
65	negl	%ecx			C negate j size/index for inner loop
66	xorl	%eax,%eax		C clear carry
67
68	ALIGN(8)
69L(oop1):	adcl	$0,%ebx
70	movl	(%esi,%ecx,4),%eax	C load next limb at xp[j]
71	mull	(%ebp)
72	addl	%ebx,%eax
73	movl	%eax,(%edi,%ecx,4)
74	incl	%ecx
75	movl	%edx,%ebx
76	jnz	L(oop1)
77
78	adcl	$0,%ebx
79	movl	PARAM_YSIZE,%eax
80	movl	%ebx,(%edi)		C most significant limb of product
81	addl	$4,%edi			C increment wp
82	decl	%eax
83	jz	L(skip)
84	movl	%eax,VAR_COUNTER	C set index i to ysize
85
86L(outer):
87	addl	$4,%ebp			C make ebp point to next y limb
88	movl	PARAM_XSIZE,%ecx
89	negl	%ecx
90	xorl	%ebx,%ebx
91
92	C code at 0x61 here, close enough to aligned
93L(oop2):
94	adcl	$0,%ebx
95	movl	(%esi,%ecx,4),%eax
96	mull	(%ebp)
97	addl	%ebx,%eax
98	movl	(%edi,%ecx,4),%ebx
99	adcl	$0,%edx
100	addl	%eax,%ebx
101	movl	%ebx,(%edi,%ecx,4)
102	incl	%ecx
103	movl	%edx,%ebx
104	jnz	L(oop2)
105
106	adcl	$0,%ebx
107
108	movl	%ebx,(%edi)
109	addl	$4,%edi
110	movl	VAR_COUNTER,%eax
111	decl	%eax
112	movl	%eax,VAR_COUNTER
113	jnz	L(outer)
114
115L(skip):
116	popl	%ebx
117	popl	%edi
118	popl	%ebp
119	popl	%esi
120	addl	$4,%esp
121	ret
122
123L(done):
124	movl	%edx,4(%edi)	C store to wp[1]
125	popl	%edi
126	popl	%ebp
127	popl	%esi
128	popl	%eax		C dummy pop for deallocating stack slot
129	ret
130
131EPILOGUE()
132
133