1/* Pentium __mpn_sub_n -- Subtract two limb vectors of the same length > 0
2   and store difference in a third limb vector.
3   Copyright (C) 1992, 94, 95, 96, 97, 98, 2000 Free Software Foundation, Inc.
4   This file is part of the GNU MP Library.
5
6   The GNU MP Library is free software; you can redistribute it and/or modify
7   it under the terms of the GNU Lesser General Public License as published by
8   the Free Software Foundation; either version 2.1 of the License, or (at your
9   option) any later version.
10
11   The GNU MP Library is distributed in the hope that it will be useful, but
12   WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
14   License for more details.
15
16   You should have received a copy of the GNU Lesser General Public License
17   along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
18   the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
19   MA 02111-1307, USA. */
20
21#include "sysdep.h"
22#include "asm-syntax.h"
23#include "bp-sym.h"
24#include "bp-asm.h"
25
26#define PARMS	LINKAGE+16		/* space for 4 saved regs */
27#define RES	PARMS
28#define S1	RES+PTR_SIZE
29#define S2	S1+PTR_SIZE
30#define SIZE	S2+PTR_SIZE
31
32	.text
33ENTRY (BP_SYM (__mpn_sub_n))
34	ENTER
35
36	pushl	%edi
37	pushl	%esi
38	pushl	%ebp
39	pushl	%ebx
40
41	movl	RES(%esp),%edi
42	movl	S1(%esp),%esi
43	movl	S2(%esp),%ebx
44	movl	SIZE(%esp),%ecx
45#if __BOUNDED_POINTERS__
46	shll	$2, %ecx	/* convert limbs to bytes */
47	CHECK_BOUNDS_BOTH_WIDE (%edi, RES(%esp), %ecx)
48	CHECK_BOUNDS_BOTH_WIDE (%esi, S1(%esp), %ecx)
49	CHECK_BOUNDS_BOTH_WIDE (%ebx, S2(%esp), %ecx)
50	shrl	$2, %ecx
51#endif
52	movl	(%ebx),%ebp
53
54	decl	%ecx
55	movl	%ecx,%edx
56	shrl	$3,%ecx
57	andl	$7,%edx
58	testl	%ecx,%ecx		/* zero carry flag */
59	jz	L(end)
60	pushl	%edx
61
62	ALIGN (3)
63L(oop):	movl	28(%edi),%eax		/* fetch destination cache line */
64	leal	32(%edi),%edi
65
66L(1):	movl	(%esi),%eax
67	movl	4(%esi),%edx
68	sbbl	%ebp,%eax
69	movl	4(%ebx),%ebp
70	sbbl	%ebp,%edx
71	movl	8(%ebx),%ebp
72	movl	%eax,-32(%edi)
73	movl	%edx,-28(%edi)
74
75L(2):	movl	8(%esi),%eax
76	movl	12(%esi),%edx
77	sbbl	%ebp,%eax
78	movl	12(%ebx),%ebp
79	sbbl	%ebp,%edx
80	movl	16(%ebx),%ebp
81	movl	%eax,-24(%edi)
82	movl	%edx,-20(%edi)
83
84L(3):	movl	16(%esi),%eax
85	movl	20(%esi),%edx
86	sbbl	%ebp,%eax
87	movl	20(%ebx),%ebp
88	sbbl	%ebp,%edx
89	movl	24(%ebx),%ebp
90	movl	%eax,-16(%edi)
91	movl	%edx,-12(%edi)
92
93L(4):	movl	24(%esi),%eax
94	movl	28(%esi),%edx
95	sbbl	%ebp,%eax
96	movl	28(%ebx),%ebp
97	sbbl	%ebp,%edx
98	movl	32(%ebx),%ebp
99	movl	%eax,-8(%edi)
100	movl	%edx,-4(%edi)
101
102	leal	32(%esi),%esi
103	leal	32(%ebx),%ebx
104	decl	%ecx
105	jnz	L(oop)
106
107	popl	%edx
108L(end):
109	decl	%edx			/* test %edx w/o clobbering carry */
110	js	L(end2)
111	incl	%edx
112L(oop2):
113	leal	4(%edi),%edi
114	movl	(%esi),%eax
115	sbbl	%ebp,%eax
116	movl	4(%ebx),%ebp
117	movl	%eax,-4(%edi)
118	leal	4(%esi),%esi
119	leal	4(%ebx),%ebx
120	decl	%edx
121	jnz	L(oop2)
122L(end2):
123	movl	(%esi),%eax
124	sbbl	%ebp,%eax
125	movl	%eax,(%edi)
126
127	sbbl	%eax,%eax
128	negl	%eax
129
130	popl	%ebx
131	popl	%ebp
132	popl	%esi
133	popl	%edi
134
135	LEAVE
136	ret
137END (BP_SYM (__mpn_sub_n))
138