1dnl  IA-64 mpn_invert_limb -- Invert a normalized limb.
2
3dnl  Copyright 2000, 2002, 2004 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of the GNU Lesser General Public License as published
9dnl  by the Free Software Foundation; either version 3 of the License, or (at
10dnl  your option) any later version.
11
12dnl  The GNU MP Library is distributed in the hope that it will be useful, but
13dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15dnl  License for more details.
16
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20include(`../config.m4')
21
22C INPUT PARAMETERS
23C d = r32
24
25C           cycles
26C Itanium:    74
27C Itanium 2:  50+6
28
29C It should be possible to avoid the xmpy.hu and the following tests by
30C explicitly chopping in the last fma.  That would save about 10 cycles.
31
32ASM_START()
33	.sdata
34	.align 16
35ifdef(`HAVE_DOUBLE_IEEE_LITTLE_ENDIAN',`
36.LC0:	data4 0x00000000, 0x80000000, 0x0000403f, 0x00000000	C 2^64
37.LC1:	data4 0x00000000, 0x80000000, 0x0000407f, 0x00000000	C 2^128
38
39',`ifdef(`HAVE_DOUBLE_IEEE_BIG_ENDIAN',`
40.LC0:	data4 0x403f8000, 0x00000000, 0x00000000, 0x00000000	C 2^64
41.LC1:	data4 0x407f8000, 0x00000000, 0x00000000, 0x00000000	C 2^128
42
43',`m4_error(`Oops, need to know float endianness
44')')')
45
46
47PROLOGUE(mpn_invert_limb)
48		C 00
49	addl		r14 = @gprel(.LC0), gp
50	addl		r15 = @gprel(.LC1), gp
51	setf.sig	f7 = r32
52	add		r9 = r32, r32		C check for d = 2^63
53	;;	C 01
54	ldfe		f10 = [r14]		C 2^64
55	ldfe		f8 = [r15]		C 2^128
56	cmp.eq		p6, p0 = 0, r9		C check for d = 2^63
57	mov		r8 = -1			C retval for 2^63
58   (p6)	br.ret.spnt.many b0
59	;;	C 07
60	fmpy.s1		f11 = f7, f10		C f11 = d * 2^64
61	fnma.s1		f6 = f7, f10, f8	C f6 = 2^128 - d * 2^64
62	;;	C 11
63	frcpa.s1	f8, p6 = f6, f7
64	;;	C 15
65   (p6)	fnma.s1		f9 = f7, f8, f1
66   (p6)	fmpy.s1		f10 = f6, f8
67	;;	C 19
68   (p6)	fmpy.s1		f11 = f9, f9
69   (p6)	fma.s1		f10 = f9, f10, f10
70	;;	C 23
71   (p6)	fma.s1		f8 = f9, f8, f8
72   (p6)	fma.s1		f9 = f11, f10, f10
73	;;	C 27
74   (p6)	fma.s1		f8 = f11, f8, f8
75   (p6)	fnma.s1		f10 = f7, f9, f6
76	;;	C 31
77   (p6)	fma.s1		f8 = f10, f8, f9
78	;;	C 35
79	fcvt.fxu.trunc.s1 f8 = f8
80	;;	C 39
81	getf.sig	r8 = f8
82	xmpy.hu		f10 = f8, f7		C di * d
83	;;	C 43
84	getf.sig	r14 = f10
85	andcm		r9 = -1, r32		C one's complement
86	;;	C 48
87	cmp.ltu		p6, p0 = r9, r14	C got overflow?
88	;;	C 49
89   (p6)	add		r8 = -1, r8		C adjust di down
90	br.ret.sptk.many b0
91EPILOGUE()
92ASM_END()
93