1dnl  AMD K6-2 mpn_com -- mpn bitwise one's complement.
2
3dnl  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
4dnl
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or
8dnl  modify it under the terms of the GNU Lesser General Public License as
9dnl  published by the Free Software Foundation; either version 3 of the
10dnl  License, or (at your option) any later version.
11dnl
12dnl  The GNU MP Library is distributed in the hope that it will be useful,
13dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
14dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15dnl  Lesser General Public License for more details.
16dnl
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20include(`../config.m4')
21
22NAILS_SUPPORT(0-31)
23
24
25C    alignment dst/src, A=0mod8 N=4mod8
26C       A/A   A/N   N/A   N/N
27C K6-2  1.0   1.18  1.18  1.18  cycles/limb
28C K6    1.5   1.85  1.75  1.85
29
30
31C void mpn_com (mp_ptr dst, mp_srcptr src, mp_size_t size);
32C
33C Take the bitwise ones-complement of src,size and write it to dst,size.
34
35defframe(PARAM_SIZE,12)
36defframe(PARAM_SRC, 8)
37defframe(PARAM_DST, 4)
38
39	TEXT
40	ALIGN(16)
41PROLOGUE(mpn_com)
42deflit(`FRAME',0)
43
44	movl	PARAM_SIZE, %ecx
45	movl	PARAM_SRC, %eax
46	movl	PARAM_DST, %edx
47	shrl	%ecx
48	jnz	L(two_or_more)
49
50	movl	(%eax), %eax
51	notl_or_xorl_GMP_NUMB_MASK(	%eax)
52	movl	%eax, (%edx)
53	ret
54
55
56L(two_or_more):
57	pushl	%ebx	FRAME_pushl()
58	pcmpeqd	%mm7, %mm7		C all ones
59
60	movl	%ecx, %ebx
61ifelse(GMP_NAIL_BITS,0,,
62`	psrld	$GMP_NAIL_BITS, %mm7')	C clear nails
63
64
65
66	ALIGN(8)
67L(top):
68	C eax	src
69	C ebx	floor(size/2)
70	C ecx	counter
71	C edx	dst
72	C
73	C mm0	scratch
74	C mm7	mask
75
76	movq	-8(%eax,%ecx,8), %mm0
77	pxor	%mm7, %mm0
78	movq	%mm0, -8(%edx,%ecx,8)
79	loop	L(top)
80
81
82	jnc	L(no_extra)
83	movl	(%eax,%ebx,8), %eax
84	notl_or_xorl_GMP_NUMB_MASK(	%eax)
85	movl	%eax, (%edx,%ebx,8)
86L(no_extra):
87
88	popl	%ebx
89	emms_or_femms
90	ret
91
92EPILOGUE()
93