1169689Skan/* Copyright (C) 2000, 2001, 2003, 2005 Free Software Foundation, Inc.
2169689Skan   Contributed by James E. Wilson <wilson@cygnus.com>.
3169689Skan
4169689Skan   This file is part of GCC.
5169689Skan
6169689Skan   GCC is free software; you can redistribute it and/or modify
7169689Skan   it under the terms of the GNU General Public License as published by
8169689Skan   the Free Software Foundation; either version 2, or (at your option)
9169689Skan   any later version.
10169689Skan
11169689Skan   GCC is distributed in the hope that it will be useful,
12169689Skan   but WITHOUT ANY WARRANTY; without even the implied warranty of
13169689Skan   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14169689Skan   GNU General Public License for more details.
15169689Skan
16169689Skan   You should have received a copy of the GNU General Public License
17169689Skan   along with GCC; see the file COPYING.  If not, write to
18169689Skan   the Free Software Foundation, 51 Franklin Street, Fifth Floor,
19169689Skan   Boston, MA 02110-1301, USA.  */
20169689Skan
21169689Skan/* As a special exception, if you link this library with other files,
22169689Skan   some of which are compiled with GCC, to produce an executable,
23169689Skan   this library does not by itself cause the resulting executable
24169689Skan   to be covered by the GNU General Public License.
25169689Skan   This exception does not however invalidate any other reasons why
26169689Skan   the executable file might be covered by the GNU General Public License.  */
27169689Skan
28132718Skan#ifdef L__divxf3
2990075Sobrien// Compute a 80-bit IEEE double-extended quotient.
3090075Sobrien//
3190075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency
3290075Sobrien// alternative.
3390075Sobrien//
3490075Sobrien// farg0 holds the dividend.  farg1 holds the divisor.
35132718Skan//
36132718Skan// __divtf3 is an alternate symbol name for backward compatibility.
3790075Sobrien
3890075Sobrien	.text
3990075Sobrien	.align 16
40132718Skan	.global __divxf3
4190075Sobrien	.global __divtf3
42132718Skan	.proc __divxf3
43132718Skan__divxf3:
4490075Sobrien__divtf3:
4590075Sobrien	cmp.eq p7, p0 = r0, r0
4690075Sobrien	frcpa.s0 f10, p6 = farg0, farg1
4790075Sobrien	;;
4890075Sobrien(p6)	cmp.ne p7, p0 = r0, r0
4990075Sobrien	.pred.rel.mutex p6, p7
5090075Sobrien(p6)	fnma.s1 f11 = farg1, f10, f1
5190075Sobrien(p6)	fma.s1 f12 = farg0, f10, f0
5290075Sobrien	;;
5390075Sobrien(p6)	fma.s1 f13 = f11, f11, f0
5490075Sobrien(p6)	fma.s1 f14 = f11, f11, f11
5590075Sobrien	;;
5690075Sobrien(p6)	fma.s1 f11 = f13, f13, f11
5790075Sobrien(p6)	fma.s1 f13 = f14, f10, f10
5890075Sobrien	;;
5990075Sobrien(p6)	fma.s1 f10 = f13, f11, f10
6090075Sobrien(p6)	fnma.s1 f11 = farg1, f12, farg0
6190075Sobrien	;;
6290075Sobrien(p6)	fma.s1 f11 = f11, f10, f12
6390075Sobrien(p6)	fnma.s1 f12 = farg1, f10, f1
6490075Sobrien	;;
6590075Sobrien(p6)	fma.s1 f10 = f12, f10, f10
6690075Sobrien(p6)	fnma.s1 f12 = farg1, f11, farg0
6790075Sobrien	;;
6890075Sobrien(p6)	fma.s0 fret0 = f12, f10, f11
6990075Sobrien(p7)	mov fret0 = f10
7090075Sobrien	br.ret.sptk rp
71132718Skan	.endp __divxf3
7290075Sobrien#endif
7390075Sobrien
7490075Sobrien#ifdef L__divdf3
7590075Sobrien// Compute a 64-bit IEEE double quotient.
7690075Sobrien//
7790075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency
7890075Sobrien// alternative.
7990075Sobrien//
8090075Sobrien// farg0 holds the dividend.  farg1 holds the divisor.
8190075Sobrien
8290075Sobrien	.text
8390075Sobrien	.align 16
8490075Sobrien	.global __divdf3
8590075Sobrien	.proc __divdf3
8690075Sobrien__divdf3:
8790075Sobrien	cmp.eq p7, p0 = r0, r0
8890075Sobrien	frcpa.s0 f10, p6 = farg0, farg1
8990075Sobrien	;;
9090075Sobrien(p6)	cmp.ne p7, p0 = r0, r0
9190075Sobrien	.pred.rel.mutex p6, p7
9290075Sobrien(p6)	fmpy.s1 f11 = farg0, f10
9390075Sobrien(p6)	fnma.s1 f12 = farg1, f10, f1
9490075Sobrien	;;
9590075Sobrien(p6)	fma.s1 f11 = f12, f11, f11
9690075Sobrien(p6)	fmpy.s1 f13 = f12, f12
9790075Sobrien	;;
9890075Sobrien(p6)	fma.s1 f10 = f12, f10, f10
9990075Sobrien(p6)	fma.s1 f11 = f13, f11, f11
10090075Sobrien	;;
10190075Sobrien(p6)	fmpy.s1 f12 = f13, f13
10290075Sobrien(p6)	fma.s1 f10 = f13, f10, f10
10390075Sobrien	;;
10490075Sobrien(p6)	fma.d.s1 f11 = f12, f11, f11
10590075Sobrien(p6)	fma.s1 f10 = f12, f10, f10
10690075Sobrien	;;
10790075Sobrien(p6)	fnma.d.s1 f8 = farg1, f11, farg0
10890075Sobrien	;;
10990075Sobrien(p6)	fma.d fret0 = f8, f10, f11
11090075Sobrien(p7)	mov fret0 = f10
11190075Sobrien	br.ret.sptk rp
11290075Sobrien	;;
11390075Sobrien	.endp __divdf3
11490075Sobrien#endif
11590075Sobrien
11690075Sobrien#ifdef L__divsf3
11790075Sobrien// Compute a 32-bit IEEE float quotient.
11890075Sobrien//
11990075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency
12090075Sobrien// alternative.
12190075Sobrien//
12290075Sobrien// farg0 holds the dividend.  farg1 holds the divisor.
12390075Sobrien
12490075Sobrien	.text
12590075Sobrien	.align 16
12690075Sobrien	.global __divsf3
12790075Sobrien	.proc __divsf3
12890075Sobrien__divsf3:
12990075Sobrien	cmp.eq p7, p0 = r0, r0
13090075Sobrien	frcpa.s0 f10, p6 = farg0, farg1
13190075Sobrien	;;
13290075Sobrien(p6)	cmp.ne p7, p0 = r0, r0
13390075Sobrien	.pred.rel.mutex p6, p7
13490075Sobrien(p6)	fmpy.s1 f8 = farg0, f10
13590075Sobrien(p6)	fnma.s1 f9 = farg1, f10, f1
13690075Sobrien	;;
13790075Sobrien(p6)	fma.s1 f8 = f9, f8, f8
13890075Sobrien(p6)	fmpy.s1 f9 = f9, f9
13990075Sobrien	;;
14090075Sobrien(p6)	fma.s1 f8 = f9, f8, f8
14190075Sobrien(p6)	fmpy.s1 f9 = f9, f9
14290075Sobrien	;;
14390075Sobrien(p6)	fma.d.s1 f10 = f9, f8, f8
14490075Sobrien	;;
14590075Sobrien(p6)	fnorm.s.s0 fret0 = f10
14690075Sobrien(p7)	mov fret0 = f10
14790075Sobrien	br.ret.sptk rp
14890075Sobrien	;;
14990075Sobrien	.endp __divsf3
15090075Sobrien#endif
15190075Sobrien
15290075Sobrien#ifdef L__divdi3
15390075Sobrien// Compute a 64-bit integer quotient.
15490075Sobrien//
15590075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency
15690075Sobrien// alternative.
15790075Sobrien//
15890075Sobrien// in0 holds the dividend.  in1 holds the divisor.
15990075Sobrien
16090075Sobrien	.text
16190075Sobrien	.align 16
16290075Sobrien	.global __divdi3
16390075Sobrien	.proc __divdi3
16490075Sobrien__divdi3:
16590075Sobrien	.regstk 2,0,0,0
16690075Sobrien	// Transfer inputs to FP registers.
16790075Sobrien	setf.sig f8 = in0
16890075Sobrien	setf.sig f9 = in1
169169689Skan	// Check divide by zero.
170169689Skan	cmp.ne.unc p0,p7=0,in1
17190075Sobrien	;;
17290075Sobrien	// Convert the inputs to FP, so that they won't be treated as unsigned.
17390075Sobrien	fcvt.xf f8 = f8
17490075Sobrien	fcvt.xf f9 = f9
175169689Skan(p7)	break 1
17690075Sobrien	;;
17790075Sobrien	// Compute the reciprocal approximation.
17890075Sobrien	frcpa.s1 f10, p6 = f8, f9
17990075Sobrien	;;
18090075Sobrien	// 3 Newton-Raphson iterations.
18190075Sobrien(p6)	fnma.s1 f11 = f9, f10, f1
18290075Sobrien(p6)	fmpy.s1 f12 = f8, f10
18390075Sobrien	;;
18490075Sobrien(p6)	fmpy.s1 f13 = f11, f11
18590075Sobrien(p6)	fma.s1 f12 = f11, f12, f12
18690075Sobrien	;;
18790075Sobrien(p6)	fma.s1 f10 = f11, f10, f10
18890075Sobrien(p6)	fma.s1 f11 = f13, f12, f12
18990075Sobrien	;;
19090075Sobrien(p6)	fma.s1 f10 = f13, f10, f10
19190075Sobrien(p6)	fnma.s1 f12 = f9, f11, f8
19290075Sobrien	;;
19390075Sobrien(p6)	fma.s1 f10 = f12, f10, f11
19490075Sobrien	;;
19590075Sobrien	// Round quotient to an integer.
19690075Sobrien	fcvt.fx.trunc.s1 f10 = f10
19790075Sobrien	;;
19890075Sobrien	// Transfer result to GP registers.
19990075Sobrien	getf.sig ret0 = f10
20090075Sobrien	br.ret.sptk rp
20190075Sobrien	;;
20290075Sobrien	.endp __divdi3
20390075Sobrien#endif
20490075Sobrien
20590075Sobrien#ifdef L__moddi3
20690075Sobrien// Compute a 64-bit integer modulus.
20790075Sobrien//
20890075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency
20990075Sobrien// alternative.
21090075Sobrien//
21190075Sobrien// in0 holds the dividend (a).  in1 holds the divisor (b).
21290075Sobrien
21390075Sobrien	.text
21490075Sobrien	.align 16
21590075Sobrien	.global __moddi3
21690075Sobrien	.proc __moddi3
21790075Sobrien__moddi3:
21890075Sobrien	.regstk 2,0,0,0
21990075Sobrien	// Transfer inputs to FP registers.
22090075Sobrien	setf.sig f14 = in0
22190075Sobrien	setf.sig f9 = in1
222169689Skan	// Check divide by zero.
223169689Skan	cmp.ne.unc p0,p7=0,in1
22490075Sobrien	;;
22590075Sobrien	// Convert the inputs to FP, so that they won't be treated as unsigned.
22690075Sobrien	fcvt.xf f8 = f14
22790075Sobrien	fcvt.xf f9 = f9
228169689Skan(p7)	break 1
22990075Sobrien	;;
23090075Sobrien	// Compute the reciprocal approximation.
23190075Sobrien	frcpa.s1 f10, p6 = f8, f9
23290075Sobrien	;;
23390075Sobrien	// 3 Newton-Raphson iterations.
23490075Sobrien(p6)	fmpy.s1 f12 = f8, f10
23590075Sobrien(p6)	fnma.s1 f11 = f9, f10, f1
23690075Sobrien	;;
23790075Sobrien(p6)	fma.s1 f12 = f11, f12, f12
23890075Sobrien(p6)	fmpy.s1 f13 = f11, f11
23990075Sobrien	;;
24090075Sobrien(p6)	fma.s1 f10 = f11, f10, f10
24190075Sobrien(p6)	fma.s1 f11 = f13, f12, f12
24290075Sobrien	;;
24390075Sobrien	sub in1 = r0, in1
24490075Sobrien(p6)	fma.s1 f10 = f13, f10, f10
24590075Sobrien(p6)	fnma.s1 f12 = f9, f11, f8
24690075Sobrien	;;
24790075Sobrien	setf.sig f9 = in1
24890075Sobrien(p6)	fma.s1 f10 = f12, f10, f11
24990075Sobrien	;;
25090075Sobrien	fcvt.fx.trunc.s1 f10 = f10
25190075Sobrien	;;
25290075Sobrien	// r = q * (-b) + a
25390075Sobrien	xma.l f10 = f10, f9, f14
25490075Sobrien	;;
25590075Sobrien	// Transfer result to GP registers.
25690075Sobrien	getf.sig ret0 = f10
25790075Sobrien	br.ret.sptk rp
25890075Sobrien	;;
25990075Sobrien	.endp __moddi3
26090075Sobrien#endif
26190075Sobrien
26290075Sobrien#ifdef L__udivdi3
26390075Sobrien// Compute a 64-bit unsigned integer quotient.
26490075Sobrien//
26590075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency
26690075Sobrien// alternative.
26790075Sobrien//
26890075Sobrien// in0 holds the dividend.  in1 holds the divisor.
26990075Sobrien
27090075Sobrien	.text
27190075Sobrien	.align 16
27290075Sobrien	.global __udivdi3
27390075Sobrien	.proc __udivdi3
27490075Sobrien__udivdi3:
27590075Sobrien	.regstk 2,0,0,0
27690075Sobrien	// Transfer inputs to FP registers.
27790075Sobrien	setf.sig f8 = in0
27890075Sobrien	setf.sig f9 = in1
279169689Skan	// Check divide by zero.
280169689Skan	cmp.ne.unc p0,p7=0,in1
28190075Sobrien	;;
28290075Sobrien	// Convert the inputs to FP, to avoid FP software-assist faults.
28390075Sobrien	fcvt.xuf.s1 f8 = f8
28490075Sobrien	fcvt.xuf.s1 f9 = f9
285169689Skan(p7)	break 1
28690075Sobrien	;;
28790075Sobrien	// Compute the reciprocal approximation.
28890075Sobrien	frcpa.s1 f10, p6 = f8, f9
28990075Sobrien	;;
29090075Sobrien	// 3 Newton-Raphson iterations.
29190075Sobrien(p6)	fnma.s1 f11 = f9, f10, f1
29290075Sobrien(p6)	fmpy.s1 f12 = f8, f10
29390075Sobrien	;;
29490075Sobrien(p6)	fmpy.s1 f13 = f11, f11
29590075Sobrien(p6)	fma.s1 f12 = f11, f12, f12
29690075Sobrien	;;
29790075Sobrien(p6)	fma.s1 f10 = f11, f10, f10
29890075Sobrien(p6)	fma.s1 f11 = f13, f12, f12
29990075Sobrien	;;
30090075Sobrien(p6)	fma.s1 f10 = f13, f10, f10
30190075Sobrien(p6)	fnma.s1 f12 = f9, f11, f8
30290075Sobrien	;;
30390075Sobrien(p6)	fma.s1 f10 = f12, f10, f11
30490075Sobrien	;;
30590075Sobrien	// Round quotient to an unsigned integer.
30690075Sobrien	fcvt.fxu.trunc.s1 f10 = f10
30790075Sobrien	;;
30890075Sobrien	// Transfer result to GP registers.
30990075Sobrien	getf.sig ret0 = f10
31090075Sobrien	br.ret.sptk rp
31190075Sobrien	;;
31290075Sobrien	.endp __udivdi3
31390075Sobrien#endif
31490075Sobrien
31590075Sobrien#ifdef L__umoddi3
31690075Sobrien// Compute a 64-bit unsigned integer modulus.
31790075Sobrien//
31890075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency
31990075Sobrien// alternative.
32090075Sobrien//
32190075Sobrien// in0 holds the dividend (a).  in1 holds the divisor (b).
32290075Sobrien
32390075Sobrien	.text
32490075Sobrien	.align 16
32590075Sobrien	.global __umoddi3
32690075Sobrien	.proc __umoddi3
32790075Sobrien__umoddi3:
32890075Sobrien	.regstk 2,0,0,0
32990075Sobrien	// Transfer inputs to FP registers.
33090075Sobrien	setf.sig f14 = in0
33190075Sobrien	setf.sig f9 = in1
332169689Skan	// Check divide by zero.
333169689Skan	cmp.ne.unc p0,p7=0,in1
33490075Sobrien	;;
33590075Sobrien	// Convert the inputs to FP, to avoid FP software assist faults.
33690075Sobrien	fcvt.xuf.s1 f8 = f14
33790075Sobrien	fcvt.xuf.s1 f9 = f9
338169689Skan(p7)	break 1;
33990075Sobrien	;;
34090075Sobrien	// Compute the reciprocal approximation.
34190075Sobrien	frcpa.s1 f10, p6 = f8, f9
34290075Sobrien	;;
34390075Sobrien	// 3 Newton-Raphson iterations.
34490075Sobrien(p6)	fmpy.s1 f12 = f8, f10
34590075Sobrien(p6)	fnma.s1 f11 = f9, f10, f1
34690075Sobrien	;;
34790075Sobrien(p6)	fma.s1 f12 = f11, f12, f12
34890075Sobrien(p6)	fmpy.s1 f13 = f11, f11
34990075Sobrien	;;
35090075Sobrien(p6)	fma.s1 f10 = f11, f10, f10
35190075Sobrien(p6)	fma.s1 f11 = f13, f12, f12
35290075Sobrien	;;
35390075Sobrien	sub in1 = r0, in1
35490075Sobrien(p6)	fma.s1 f10 = f13, f10, f10
35590075Sobrien(p6)	fnma.s1 f12 = f9, f11, f8
35690075Sobrien	;;
35790075Sobrien	setf.sig f9 = in1
35890075Sobrien(p6)	fma.s1 f10 = f12, f10, f11
35990075Sobrien	;;
36090075Sobrien	// Round quotient to an unsigned integer.
36190075Sobrien	fcvt.fxu.trunc.s1 f10 = f10
36290075Sobrien	;;
36390075Sobrien	// r = q * (-b) + a
36490075Sobrien	xma.l f10 = f10, f9, f14
36590075Sobrien	;;
36690075Sobrien	// Transfer result to GP registers.
36790075Sobrien	getf.sig ret0 = f10
36890075Sobrien	br.ret.sptk rp
36990075Sobrien	;;
37090075Sobrien	.endp __umoddi3
37190075Sobrien#endif
37290075Sobrien
37390075Sobrien#ifdef L__divsi3
37490075Sobrien// Compute a 32-bit integer quotient.
37590075Sobrien//
37690075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency
37790075Sobrien// alternative.
37890075Sobrien//
37990075Sobrien// in0 holds the dividend.  in1 holds the divisor.
38090075Sobrien
38190075Sobrien	.text
38290075Sobrien	.align 16
38390075Sobrien	.global __divsi3
38490075Sobrien	.proc __divsi3
38590075Sobrien__divsi3:
38690075Sobrien	.regstk 2,0,0,0
387169689Skan	// Check divide by zero.
388169689Skan	cmp.ne.unc p0,p7=0,in1
38990075Sobrien	sxt4 in0 = in0
39090075Sobrien	sxt4 in1 = in1
39190075Sobrien	;;
39290075Sobrien	setf.sig f8 = in0
39390075Sobrien	setf.sig f9 = in1
394169689Skan(p7)	break 1
39590075Sobrien	;;
39690075Sobrien	mov r2 = 0x0ffdd
39790075Sobrien	fcvt.xf f8 = f8
39890075Sobrien	fcvt.xf f9 = f9
39990075Sobrien	;;
40090075Sobrien	setf.exp f11 = r2
40190075Sobrien	frcpa.s1 f10, p6 = f8, f9
40290075Sobrien	;;
40390075Sobrien(p6)	fmpy.s1 f8 = f8, f10
40490075Sobrien(p6)	fnma.s1 f9 = f9, f10, f1
40590075Sobrien	;;
40690075Sobrien(p6)	fma.s1 f8 = f9, f8, f8
40790075Sobrien(p6)	fma.s1 f9 = f9, f9, f11
40890075Sobrien	;;
40990075Sobrien(p6)	fma.s1 f10 = f9, f8, f8
41090075Sobrien	;;
41190075Sobrien	fcvt.fx.trunc.s1 f10 = f10
41290075Sobrien	;;
41390075Sobrien	getf.sig ret0 = f10
41490075Sobrien	br.ret.sptk rp
41590075Sobrien	;;
41690075Sobrien	.endp __divsi3
41790075Sobrien#endif
41890075Sobrien
41990075Sobrien#ifdef L__modsi3
42090075Sobrien// Compute a 32-bit integer modulus.
42190075Sobrien//
42290075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency
42390075Sobrien// alternative.
42490075Sobrien//
42590075Sobrien// in0 holds the dividend.  in1 holds the divisor.
42690075Sobrien
42790075Sobrien	.text
42890075Sobrien	.align 16
42990075Sobrien	.global __modsi3
43090075Sobrien	.proc __modsi3
43190075Sobrien__modsi3:
43290075Sobrien	.regstk 2,0,0,0
43390075Sobrien	mov r2 = 0x0ffdd
43490075Sobrien	sxt4 in0 = in0
43590075Sobrien	sxt4 in1 = in1
43690075Sobrien	;;
43790075Sobrien	setf.sig f13 = r32
43890075Sobrien	setf.sig f9 = r33
439169689Skan	// Check divide by zero.
440169689Skan	cmp.ne.unc p0,p7=0,in1
44190075Sobrien	;;
44290075Sobrien	sub in1 = r0, in1
44390075Sobrien	fcvt.xf f8 = f13
44490075Sobrien	fcvt.xf f9 = f9
44590075Sobrien	;;
44690075Sobrien	setf.exp f11 = r2
44790075Sobrien	frcpa.s1 f10, p6 = f8, f9
448169689Skan(p7)	break 1
44990075Sobrien	;;
45090075Sobrien(p6)	fmpy.s1 f12 = f8, f10
45190075Sobrien(p6)	fnma.s1 f10 = f9, f10, f1
45290075Sobrien	;;
45390075Sobrien	setf.sig f9 = in1
45490075Sobrien(p6)	fma.s1 f12 = f10, f12, f12
45590075Sobrien(p6)	fma.s1 f10 = f10, f10, f11	
45690075Sobrien	;;
45790075Sobrien(p6)	fma.s1 f10 = f10, f12, f12
45890075Sobrien	;;
45990075Sobrien	fcvt.fx.trunc.s1 f10 = f10
46090075Sobrien	;;
46190075Sobrien	xma.l f10 = f10, f9, f13
46290075Sobrien	;;
46390075Sobrien	getf.sig ret0 = f10
46490075Sobrien	br.ret.sptk rp
46590075Sobrien	;;
46690075Sobrien	.endp __modsi3
46790075Sobrien#endif
46890075Sobrien
46990075Sobrien#ifdef L__udivsi3
47090075Sobrien// Compute a 32-bit unsigned integer quotient.
47190075Sobrien//
47290075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency
47390075Sobrien// alternative.
47490075Sobrien//
47590075Sobrien// in0 holds the dividend.  in1 holds the divisor.
47690075Sobrien
47790075Sobrien	.text
47890075Sobrien	.align 16
47990075Sobrien	.global __udivsi3
48090075Sobrien	.proc __udivsi3
48190075Sobrien__udivsi3:
48290075Sobrien	.regstk 2,0,0,0
48390075Sobrien	mov r2 = 0x0ffdd
48490075Sobrien	zxt4 in0 = in0
48590075Sobrien	zxt4 in1 = in1
48690075Sobrien	;;
48790075Sobrien	setf.sig f8 = in0
48890075Sobrien	setf.sig f9 = in1
489169689Skan	// Check divide by zero.
490169689Skan	cmp.ne.unc p0,p7=0,in1
49190075Sobrien	;;
49290075Sobrien	fcvt.xf f8 = f8
49390075Sobrien	fcvt.xf f9 = f9
494169689Skan(p7)	break 1
49590075Sobrien	;;
49690075Sobrien	setf.exp f11 = r2
49790075Sobrien	frcpa.s1 f10, p6 = f8, f9
49890075Sobrien	;;
49990075Sobrien(p6)	fmpy.s1 f8 = f8, f10
50090075Sobrien(p6)	fnma.s1 f9 = f9, f10, f1
50190075Sobrien	;;
50290075Sobrien(p6)	fma.s1 f8 = f9, f8, f8
50390075Sobrien(p6)	fma.s1 f9 = f9, f9, f11
50490075Sobrien	;;
50590075Sobrien(p6)	fma.s1 f10 = f9, f8, f8
50690075Sobrien	;;
50790075Sobrien	fcvt.fxu.trunc.s1 f10 = f10
50890075Sobrien	;;
50990075Sobrien	getf.sig ret0 = f10
51090075Sobrien	br.ret.sptk rp
51190075Sobrien	;;
51290075Sobrien	.endp __udivsi3
51390075Sobrien#endif
51490075Sobrien
51590075Sobrien#ifdef L__umodsi3
51690075Sobrien// Compute a 32-bit unsigned integer modulus.
51790075Sobrien//
51890075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency
51990075Sobrien// alternative.
52090075Sobrien//
52190075Sobrien// in0 holds the dividend.  in1 holds the divisor.
52290075Sobrien
52390075Sobrien	.text
52490075Sobrien	.align 16
52590075Sobrien	.global __umodsi3
52690075Sobrien	.proc __umodsi3
52790075Sobrien__umodsi3:
52890075Sobrien	.regstk 2,0,0,0
52990075Sobrien	mov r2 = 0x0ffdd
53090075Sobrien	zxt4 in0 = in0
53190075Sobrien	zxt4 in1 = in1
53290075Sobrien	;;
53390075Sobrien	setf.sig f13 = in0
53490075Sobrien	setf.sig f9 = in1
535169689Skan	// Check divide by zero.
536169689Skan	cmp.ne.unc p0,p7=0,in1
53790075Sobrien	;;
53890075Sobrien	sub in1 = r0, in1
53990075Sobrien	fcvt.xf f8 = f13
54090075Sobrien	fcvt.xf f9 = f9
54190075Sobrien	;;
54290075Sobrien	setf.exp f11 = r2
54390075Sobrien	frcpa.s1 f10, p6 = f8, f9
544169689Skan(p7)	break 1;
54590075Sobrien	;;
54690075Sobrien(p6)	fmpy.s1 f12 = f8, f10
54790075Sobrien(p6)	fnma.s1 f10 = f9, f10, f1
54890075Sobrien	;;
54990075Sobrien	setf.sig f9 = in1
55090075Sobrien(p6)	fma.s1 f12 = f10, f12, f12
55190075Sobrien(p6)	fma.s1 f10 = f10, f10, f11
55290075Sobrien	;;
55390075Sobrien(p6)	fma.s1 f10 = f10, f12, f12
55490075Sobrien	;;
55590075Sobrien	fcvt.fxu.trunc.s1 f10 = f10
55690075Sobrien	;;
55790075Sobrien	xma.l f10 = f10, f9, f13
55890075Sobrien	;;
55990075Sobrien	getf.sig ret0 = f10
56090075Sobrien	br.ret.sptk rp
56190075Sobrien	;;
56290075Sobrien	.endp __umodsi3
56390075Sobrien#endif
56490075Sobrien
56590075Sobrien#ifdef L__save_stack_nonlocal
56690075Sobrien// Notes on save/restore stack nonlocal: We read ar.bsp but write
56790075Sobrien// ar.bspstore.  This is because ar.bsp can be read at all times
56890075Sobrien// (independent of the RSE mode) but since it's read-only we need to
56990075Sobrien// restore the value via ar.bspstore.  This is OK because
57090075Sobrien// ar.bsp==ar.bspstore after executing "flushrs".
57190075Sobrien
57290075Sobrien// void __ia64_save_stack_nonlocal(void *save_area, void *stack_pointer)
57390075Sobrien
57490075Sobrien	.text
57590075Sobrien	.align 16
57690075Sobrien	.global __ia64_save_stack_nonlocal
57790075Sobrien	.proc __ia64_save_stack_nonlocal
57890075Sobrien__ia64_save_stack_nonlocal:
57990075Sobrien	{ .mmf
58090075Sobrien	  alloc r18 = ar.pfs, 2, 0, 0, 0
58190075Sobrien	  mov r19 = ar.rsc
58290075Sobrien	  ;;
58390075Sobrien	}
58490075Sobrien	{ .mmi
58590075Sobrien	  flushrs
58690075Sobrien	  st8 [in0] = in1, 24
58790075Sobrien	  and r19 = 0x1c, r19
58890075Sobrien	  ;;
58990075Sobrien	}
59090075Sobrien	{ .mmi
59190075Sobrien	  st8 [in0] = r18, -16
59290075Sobrien	  mov ar.rsc = r19
59390075Sobrien	  or r19 = 0x3, r19
59490075Sobrien	  ;;
59590075Sobrien	}
59690075Sobrien	{ .mmi
59790075Sobrien	  mov r16 = ar.bsp
59890075Sobrien	  mov r17 = ar.rnat
59990075Sobrien	  adds r2 = 8, in0
60090075Sobrien	  ;;
60190075Sobrien	}
60290075Sobrien	{ .mmi
60390075Sobrien	  st8 [in0] = r16
60490075Sobrien	  st8 [r2] = r17
60590075Sobrien	}
60690075Sobrien	{ .mib
60790075Sobrien	  mov ar.rsc = r19
60890075Sobrien	  br.ret.sptk.few rp
60990075Sobrien	  ;;
61090075Sobrien	}
61190075Sobrien	.endp __ia64_save_stack_nonlocal
61290075Sobrien#endif
61390075Sobrien
61490075Sobrien#ifdef L__nonlocal_goto
61590075Sobrien// void __ia64_nonlocal_goto(void *target_label, void *save_area,
61690075Sobrien//			     void *static_chain);
61790075Sobrien
61890075Sobrien	.text
61990075Sobrien	.align 16
62090075Sobrien	.global __ia64_nonlocal_goto
62190075Sobrien	.proc __ia64_nonlocal_goto
62290075Sobrien__ia64_nonlocal_goto:
62390075Sobrien	{ .mmi
62490075Sobrien	  alloc r20 = ar.pfs, 3, 0, 0, 0
62590075Sobrien	  ld8 r12 = [in1], 8
62690075Sobrien	  mov.ret.sptk rp = in0, .L0
62790075Sobrien	  ;;
62890075Sobrien	}
62990075Sobrien	{ .mmf
63090075Sobrien	  ld8 r16 = [in1], 8
63190075Sobrien	  mov r19 = ar.rsc
63290075Sobrien	  ;;
63390075Sobrien	}
63490075Sobrien	{ .mmi
63590075Sobrien	  flushrs
63690075Sobrien	  ld8 r17 = [in1], 8
63790075Sobrien	  and r19 = 0x1c, r19
63890075Sobrien	  ;;
63990075Sobrien	}
64090075Sobrien	{ .mmi
64190075Sobrien	  ld8 r18 = [in1]
64290075Sobrien	  mov ar.rsc = r19
64390075Sobrien	  or r19 = 0x3, r19
64490075Sobrien	  ;;
64590075Sobrien	}
64690075Sobrien	{ .mmi
64790075Sobrien	  mov ar.bspstore = r16
64890075Sobrien	  ;;
64990075Sobrien	  mov ar.rnat = r17
65090075Sobrien	  ;;
65190075Sobrien	}
65290075Sobrien	{ .mmi
65390075Sobrien	  loadrs
65490075Sobrien	  invala
65590075Sobrien	  mov r15 = in2
65690075Sobrien	  ;;
65790075Sobrien	}
65890075Sobrien.L0:	{ .mib
65990075Sobrien	  mov ar.rsc = r19
66090075Sobrien	  mov ar.pfs = r18
66190075Sobrien	  br.ret.sptk.few rp
66290075Sobrien	  ;;
66390075Sobrien	}
66490075Sobrien	.endp __ia64_nonlocal_goto
66590075Sobrien#endif
66690075Sobrien
66790075Sobrien#ifdef L__restore_stack_nonlocal
66890075Sobrien// This is mostly the same as nonlocal_goto above.
66990075Sobrien// ??? This has not been tested yet.
67090075Sobrien
67190075Sobrien// void __ia64_restore_stack_nonlocal(void *save_area)
67290075Sobrien
67390075Sobrien	.text
67490075Sobrien	.align 16
67590075Sobrien	.global __ia64_restore_stack_nonlocal
67690075Sobrien	.proc __ia64_restore_stack_nonlocal
67790075Sobrien__ia64_restore_stack_nonlocal:
67890075Sobrien	{ .mmf
67990075Sobrien	  alloc r20 = ar.pfs, 4, 0, 0, 0
68090075Sobrien	  ld8 r12 = [in0], 8
68190075Sobrien	  ;;
68290075Sobrien	}
68390075Sobrien	{ .mmb
68490075Sobrien	  ld8 r16=[in0], 8
68590075Sobrien	  mov r19 = ar.rsc
68690075Sobrien	  ;;
68790075Sobrien	}
68890075Sobrien	{ .mmi
68990075Sobrien	  flushrs
69090075Sobrien	  ld8 r17 = [in0], 8
69190075Sobrien	  and r19 = 0x1c, r19
69290075Sobrien	  ;;
69390075Sobrien	}
69490075Sobrien	{ .mmf
69590075Sobrien	  ld8 r18 = [in0]
69690075Sobrien	  mov ar.rsc = r19
69790075Sobrien	  ;;
69890075Sobrien	}
69990075Sobrien	{ .mmi
70090075Sobrien	  mov ar.bspstore = r16
70190075Sobrien	  ;;
70290075Sobrien	  mov ar.rnat = r17
70390075Sobrien	  or r19 = 0x3, r19
70490075Sobrien	  ;;
70590075Sobrien	}
70690075Sobrien	{ .mmf
70790075Sobrien	  loadrs
70890075Sobrien	  invala
70990075Sobrien	  ;;
71090075Sobrien	}
71190075Sobrien.L0:	{ .mib
71290075Sobrien	  mov ar.rsc = r19
71390075Sobrien	  mov ar.pfs = r18
71490075Sobrien	  br.ret.sptk.few rp
71590075Sobrien	  ;;
71690075Sobrien	}
71790075Sobrien	.endp __ia64_restore_stack_nonlocal
71890075Sobrien#endif
71990075Sobrien
72090075Sobrien#ifdef L__trampoline
72190075Sobrien// Implement the nested function trampoline.  This is out of line
72290075Sobrien// so that we don't have to bother with flushing the icache, as
72390075Sobrien// well as making the on-stack trampoline smaller.
72490075Sobrien//
72590075Sobrien// The trampoline has the following form:
72690075Sobrien//
72790075Sobrien//		+-------------------+ >
72890075Sobrien//	TRAMP:	| __ia64_trampoline | |
72990075Sobrien//		+-------------------+  > fake function descriptor
73090075Sobrien//		| TRAMP+16          | |
73190075Sobrien//		+-------------------+ >
73290075Sobrien//		| target descriptor |
73390075Sobrien//		+-------------------+
73490075Sobrien//		| static link	    |
73590075Sobrien//		+-------------------+
73690075Sobrien
73790075Sobrien	.text
73890075Sobrien	.align 16
73990075Sobrien	.global __ia64_trampoline
74090075Sobrien	.proc __ia64_trampoline
74190075Sobrien__ia64_trampoline:
74290075Sobrien	{ .mmi
74390075Sobrien	  ld8 r2 = [r1], 8
74490075Sobrien	  ;;
74590075Sobrien	  ld8 r15 = [r1]
74690075Sobrien	}
74790075Sobrien	{ .mmi
74890075Sobrien	  ld8 r3 = [r2], 8
74990075Sobrien	  ;;
75090075Sobrien	  ld8 r1 = [r2]
75190075Sobrien	  mov b6 = r3
75290075Sobrien	}
75390075Sobrien	{ .bbb
75490075Sobrien	  br.sptk.many b6
75590075Sobrien	  ;;
75690075Sobrien	}
75790075Sobrien	.endp __ia64_trampoline
75890075Sobrien#endif
759132718Skan
760132718Skan// Thunks for backward compatibility.
761169689Skan#ifdef L_fixtfdi
762132718Skan	.text
763132718Skan	.align 16
764132718Skan	.global __fixtfti
765132718Skan	.proc __fixtfti
766132718Skan__fixtfti:
767132718Skan	{ .bbb
768132718Skan	  br.sptk.many __fixxfti
769132718Skan	  ;;
770132718Skan	}
771132718Skan	.endp __fixtfti
772169689Skan#endif
773169689Skan#ifdef L_fixunstfdi
774132718Skan	.align 16
775132718Skan	.global __fixunstfti
776132718Skan	.proc __fixunstfti
777132718Skan__fixunstfti:
778132718Skan	{ .bbb
779132718Skan	  br.sptk.many __fixunsxfti
780132718Skan	  ;;
781132718Skan	}
782132718Skan	.endp __fixunstfti
783169689Skan#endif
784169689Skan#if L_floatditf
785132718Skan	.align 16
786132718Skan	.global __floattitf
787132718Skan	.proc __floattitf
788132718Skan__floattitf:
789132718Skan	{ .bbb
790132718Skan	  br.sptk.many __floattixf
791132718Skan	  ;;
792132718Skan	}
793132718Skan	.endp __floattitf
794132718Skan#endif
795