lib1funcs.asm revision 132718
1132718Skan#ifdef L__divxf3
290075Sobrien// Compute a 80-bit IEEE double-extended quotient.
390075Sobrien//
490075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency
590075Sobrien// alternative.
690075Sobrien//
790075Sobrien// farg0 holds the dividend.  farg1 holds the divisor.
8132718Skan//
9132718Skan// __divtf3 is an alternate symbol name for backward compatibility.
1090075Sobrien
1190075Sobrien	.text
1290075Sobrien	.align 16
13132718Skan	.global __divxf3
1490075Sobrien	.global __divtf3
15132718Skan	.proc __divxf3
16132718Skan__divxf3:
1790075Sobrien__divtf3:
1890075Sobrien	cmp.eq p7, p0 = r0, r0
1990075Sobrien	frcpa.s0 f10, p6 = farg0, farg1
2090075Sobrien	;;
2190075Sobrien(p6)	cmp.ne p7, p0 = r0, r0
2290075Sobrien	.pred.rel.mutex p6, p7
2390075Sobrien(p6)	fnma.s1 f11 = farg1, f10, f1
2490075Sobrien(p6)	fma.s1 f12 = farg0, f10, f0
2590075Sobrien	;;
2690075Sobrien(p6)	fma.s1 f13 = f11, f11, f0
2790075Sobrien(p6)	fma.s1 f14 = f11, f11, f11
2890075Sobrien	;;
2990075Sobrien(p6)	fma.s1 f11 = f13, f13, f11
3090075Sobrien(p6)	fma.s1 f13 = f14, f10, f10
3190075Sobrien	;;
3290075Sobrien(p6)	fma.s1 f10 = f13, f11, f10
3390075Sobrien(p6)	fnma.s1 f11 = farg1, f12, farg0
3490075Sobrien	;;
3590075Sobrien(p6)	fma.s1 f11 = f11, f10, f12
3690075Sobrien(p6)	fnma.s1 f12 = farg1, f10, f1
3790075Sobrien	;;
3890075Sobrien(p6)	fma.s1 f10 = f12, f10, f10
3990075Sobrien(p6)	fnma.s1 f12 = farg1, f11, farg0
4090075Sobrien	;;
4190075Sobrien(p6)	fma.s0 fret0 = f12, f10, f11
4290075Sobrien(p7)	mov fret0 = f10
4390075Sobrien	br.ret.sptk rp
44132718Skan	.endp __divxf3
4590075Sobrien#endif
4690075Sobrien
4790075Sobrien#ifdef L__divdf3
4890075Sobrien// Compute a 64-bit IEEE double quotient.
4990075Sobrien//
5090075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency
5190075Sobrien// alternative.
5290075Sobrien//
5390075Sobrien// farg0 holds the dividend.  farg1 holds the divisor.
5490075Sobrien
5590075Sobrien	.text
5690075Sobrien	.align 16
5790075Sobrien	.global __divdf3
5890075Sobrien	.proc __divdf3
5990075Sobrien__divdf3:
6090075Sobrien	cmp.eq p7, p0 = r0, r0
6190075Sobrien	frcpa.s0 f10, p6 = farg0, farg1
6290075Sobrien	;;
6390075Sobrien(p6)	cmp.ne p7, p0 = r0, r0
6490075Sobrien	.pred.rel.mutex p6, p7
6590075Sobrien(p6)	fmpy.s1 f11 = farg0, f10
6690075Sobrien(p6)	fnma.s1 f12 = farg1, f10, f1
6790075Sobrien	;;
6890075Sobrien(p6)	fma.s1 f11 = f12, f11, f11
6990075Sobrien(p6)	fmpy.s1 f13 = f12, f12
7090075Sobrien	;;
7190075Sobrien(p6)	fma.s1 f10 = f12, f10, f10
7290075Sobrien(p6)	fma.s1 f11 = f13, f11, f11
7390075Sobrien	;;
7490075Sobrien(p6)	fmpy.s1 f12 = f13, f13
7590075Sobrien(p6)	fma.s1 f10 = f13, f10, f10
7690075Sobrien	;;
7790075Sobrien(p6)	fma.d.s1 f11 = f12, f11, f11
7890075Sobrien(p6)	fma.s1 f10 = f12, f10, f10
7990075Sobrien	;;
8090075Sobrien(p6)	fnma.d.s1 f8 = farg1, f11, farg0
8190075Sobrien	;;
8290075Sobrien(p6)	fma.d fret0 = f8, f10, f11
8390075Sobrien(p7)	mov fret0 = f10
8490075Sobrien	br.ret.sptk rp
8590075Sobrien	;;
8690075Sobrien	.endp __divdf3
8790075Sobrien#endif
8890075Sobrien
8990075Sobrien#ifdef L__divsf3
9090075Sobrien// Compute a 32-bit IEEE float quotient.
9190075Sobrien//
9290075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency
9390075Sobrien// alternative.
9490075Sobrien//
9590075Sobrien// farg0 holds the dividend.  farg1 holds the divisor.
9690075Sobrien
9790075Sobrien	.text
9890075Sobrien	.align 16
9990075Sobrien	.global __divsf3
10090075Sobrien	.proc __divsf3
10190075Sobrien__divsf3:
10290075Sobrien	cmp.eq p7, p0 = r0, r0
10390075Sobrien	frcpa.s0 f10, p6 = farg0, farg1
10490075Sobrien	;;
10590075Sobrien(p6)	cmp.ne p7, p0 = r0, r0
10690075Sobrien	.pred.rel.mutex p6, p7
10790075Sobrien(p6)	fmpy.s1 f8 = farg0, f10
10890075Sobrien(p6)	fnma.s1 f9 = farg1, f10, f1
10990075Sobrien	;;
11090075Sobrien(p6)	fma.s1 f8 = f9, f8, f8
11190075Sobrien(p6)	fmpy.s1 f9 = f9, f9
11290075Sobrien	;;
11390075Sobrien(p6)	fma.s1 f8 = f9, f8, f8
11490075Sobrien(p6)	fmpy.s1 f9 = f9, f9
11590075Sobrien	;;
11690075Sobrien(p6)	fma.d.s1 f10 = f9, f8, f8
11790075Sobrien	;;
11890075Sobrien(p6)	fnorm.s.s0 fret0 = f10
11990075Sobrien(p7)	mov fret0 = f10
12090075Sobrien	br.ret.sptk rp
12190075Sobrien	;;
12290075Sobrien	.endp __divsf3
12390075Sobrien#endif
12490075Sobrien
12590075Sobrien#ifdef L__divdi3
12690075Sobrien// Compute a 64-bit integer quotient.
12790075Sobrien//
12890075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency
12990075Sobrien// alternative.
13090075Sobrien//
13190075Sobrien// in0 holds the dividend.  in1 holds the divisor.
13290075Sobrien
13390075Sobrien	.text
13490075Sobrien	.align 16
13590075Sobrien	.global __divdi3
13690075Sobrien	.proc __divdi3
13790075Sobrien__divdi3:
13890075Sobrien	.regstk 2,0,0,0
13990075Sobrien	// Transfer inputs to FP registers.
14090075Sobrien	setf.sig f8 = in0
14190075Sobrien	setf.sig f9 = in1
14290075Sobrien	;;
14390075Sobrien	// Convert the inputs to FP, so that they won't be treated as unsigned.
14490075Sobrien	fcvt.xf f8 = f8
14590075Sobrien	fcvt.xf f9 = f9
14690075Sobrien	;;
14790075Sobrien	// Compute the reciprocal approximation.
14890075Sobrien	frcpa.s1 f10, p6 = f8, f9
14990075Sobrien	;;
15090075Sobrien	// 3 Newton-Raphson iterations.
15190075Sobrien(p6)	fnma.s1 f11 = f9, f10, f1
15290075Sobrien(p6)	fmpy.s1 f12 = f8, f10
15390075Sobrien	;;
15490075Sobrien(p6)	fmpy.s1 f13 = f11, f11
15590075Sobrien(p6)	fma.s1 f12 = f11, f12, f12
15690075Sobrien	;;
15790075Sobrien(p6)	fma.s1 f10 = f11, f10, f10
15890075Sobrien(p6)	fma.s1 f11 = f13, f12, f12
15990075Sobrien	;;
16090075Sobrien(p6)	fma.s1 f10 = f13, f10, f10
16190075Sobrien(p6)	fnma.s1 f12 = f9, f11, f8
16290075Sobrien	;;
16390075Sobrien(p6)	fma.s1 f10 = f12, f10, f11
16490075Sobrien	;;
16590075Sobrien	// Round quotient to an integer.
16690075Sobrien	fcvt.fx.trunc.s1 f10 = f10
16790075Sobrien	;;
16890075Sobrien	// Transfer result to GP registers.
16990075Sobrien	getf.sig ret0 = f10
17090075Sobrien	br.ret.sptk rp
17190075Sobrien	;;
17290075Sobrien	.endp __divdi3
17390075Sobrien#endif
17490075Sobrien
17590075Sobrien#ifdef L__moddi3
17690075Sobrien// Compute a 64-bit integer modulus.
17790075Sobrien//
17890075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency
17990075Sobrien// alternative.
18090075Sobrien//
18190075Sobrien// in0 holds the dividend (a).  in1 holds the divisor (b).
18290075Sobrien
18390075Sobrien	.text
18490075Sobrien	.align 16
18590075Sobrien	.global __moddi3
18690075Sobrien	.proc __moddi3
18790075Sobrien__moddi3:
18890075Sobrien	.regstk 2,0,0,0
18990075Sobrien	// Transfer inputs to FP registers.
19090075Sobrien	setf.sig f14 = in0
19190075Sobrien	setf.sig f9 = in1
19290075Sobrien	;;
19390075Sobrien	// Convert the inputs to FP, so that they won't be treated as unsigned.
19490075Sobrien	fcvt.xf f8 = f14
19590075Sobrien	fcvt.xf f9 = f9
19690075Sobrien	;;
19790075Sobrien	// Compute the reciprocal approximation.
19890075Sobrien	frcpa.s1 f10, p6 = f8, f9
19990075Sobrien	;;
20090075Sobrien	// 3 Newton-Raphson iterations.
20190075Sobrien(p6)	fmpy.s1 f12 = f8, f10
20290075Sobrien(p6)	fnma.s1 f11 = f9, f10, f1
20390075Sobrien	;;
20490075Sobrien(p6)	fma.s1 f12 = f11, f12, f12
20590075Sobrien(p6)	fmpy.s1 f13 = f11, f11
20690075Sobrien	;;
20790075Sobrien(p6)	fma.s1 f10 = f11, f10, f10
20890075Sobrien(p6)	fma.s1 f11 = f13, f12, f12
20990075Sobrien	;;
21090075Sobrien	sub in1 = r0, in1
21190075Sobrien(p6)	fma.s1 f10 = f13, f10, f10
21290075Sobrien(p6)	fnma.s1 f12 = f9, f11, f8
21390075Sobrien	;;
21490075Sobrien	setf.sig f9 = in1
21590075Sobrien(p6)	fma.s1 f10 = f12, f10, f11
21690075Sobrien	;;
21790075Sobrien	fcvt.fx.trunc.s1 f10 = f10
21890075Sobrien	;;
21990075Sobrien	// r = q * (-b) + a
22090075Sobrien	xma.l f10 = f10, f9, f14
22190075Sobrien	;;
22290075Sobrien	// Transfer result to GP registers.
22390075Sobrien	getf.sig ret0 = f10
22490075Sobrien	br.ret.sptk rp
22590075Sobrien	;;
22690075Sobrien	.endp __moddi3
22790075Sobrien#endif
22890075Sobrien
22990075Sobrien#ifdef L__udivdi3
23090075Sobrien// Compute a 64-bit unsigned integer quotient.
23190075Sobrien//
23290075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency
23390075Sobrien// alternative.
23490075Sobrien//
23590075Sobrien// in0 holds the dividend.  in1 holds the divisor.
23690075Sobrien
23790075Sobrien	.text
23890075Sobrien	.align 16
23990075Sobrien	.global __udivdi3
24090075Sobrien	.proc __udivdi3
24190075Sobrien__udivdi3:
24290075Sobrien	.regstk 2,0,0,0
24390075Sobrien	// Transfer inputs to FP registers.
24490075Sobrien	setf.sig f8 = in0
24590075Sobrien	setf.sig f9 = in1
24690075Sobrien	;;
24790075Sobrien	// Convert the inputs to FP, to avoid FP software-assist faults.
24890075Sobrien	fcvt.xuf.s1 f8 = f8
24990075Sobrien	fcvt.xuf.s1 f9 = f9
25090075Sobrien	;;
25190075Sobrien	// Compute the reciprocal approximation.
25290075Sobrien	frcpa.s1 f10, p6 = f8, f9
25390075Sobrien	;;
25490075Sobrien	// 3 Newton-Raphson iterations.
25590075Sobrien(p6)	fnma.s1 f11 = f9, f10, f1
25690075Sobrien(p6)	fmpy.s1 f12 = f8, f10
25790075Sobrien	;;
25890075Sobrien(p6)	fmpy.s1 f13 = f11, f11
25990075Sobrien(p6)	fma.s1 f12 = f11, f12, f12
26090075Sobrien	;;
26190075Sobrien(p6)	fma.s1 f10 = f11, f10, f10
26290075Sobrien(p6)	fma.s1 f11 = f13, f12, f12
26390075Sobrien	;;
26490075Sobrien(p6)	fma.s1 f10 = f13, f10, f10
26590075Sobrien(p6)	fnma.s1 f12 = f9, f11, f8
26690075Sobrien	;;
26790075Sobrien(p6)	fma.s1 f10 = f12, f10, f11
26890075Sobrien	;;
26990075Sobrien	// Round quotient to an unsigned integer.
27090075Sobrien	fcvt.fxu.trunc.s1 f10 = f10
27190075Sobrien	;;
27290075Sobrien	// Transfer result to GP registers.
27390075Sobrien	getf.sig ret0 = f10
27490075Sobrien	br.ret.sptk rp
27590075Sobrien	;;
27690075Sobrien	.endp __udivdi3
27790075Sobrien#endif
27890075Sobrien
27990075Sobrien#ifdef L__umoddi3
28090075Sobrien// Compute a 64-bit unsigned integer modulus.
28190075Sobrien//
28290075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency
28390075Sobrien// alternative.
28490075Sobrien//
28590075Sobrien// in0 holds the dividend (a).  in1 holds the divisor (b).
28690075Sobrien
28790075Sobrien	.text
28890075Sobrien	.align 16
28990075Sobrien	.global __umoddi3
29090075Sobrien	.proc __umoddi3
29190075Sobrien__umoddi3:
29290075Sobrien	.regstk 2,0,0,0
29390075Sobrien	// Transfer inputs to FP registers.
29490075Sobrien	setf.sig f14 = in0
29590075Sobrien	setf.sig f9 = in1
29690075Sobrien	;;
29790075Sobrien	// Convert the inputs to FP, to avoid FP software assist faults.
29890075Sobrien	fcvt.xuf.s1 f8 = f14
29990075Sobrien	fcvt.xuf.s1 f9 = f9
30090075Sobrien	;;
30190075Sobrien	// Compute the reciprocal approximation.
30290075Sobrien	frcpa.s1 f10, p6 = f8, f9
30390075Sobrien	;;
30490075Sobrien	// 3 Newton-Raphson iterations.
30590075Sobrien(p6)	fmpy.s1 f12 = f8, f10
30690075Sobrien(p6)	fnma.s1 f11 = f9, f10, f1
30790075Sobrien	;;
30890075Sobrien(p6)	fma.s1 f12 = f11, f12, f12
30990075Sobrien(p6)	fmpy.s1 f13 = f11, f11
31090075Sobrien	;;
31190075Sobrien(p6)	fma.s1 f10 = f11, f10, f10
31290075Sobrien(p6)	fma.s1 f11 = f13, f12, f12
31390075Sobrien	;;
31490075Sobrien	sub in1 = r0, in1
31590075Sobrien(p6)	fma.s1 f10 = f13, f10, f10
31690075Sobrien(p6)	fnma.s1 f12 = f9, f11, f8
31790075Sobrien	;;
31890075Sobrien	setf.sig f9 = in1
31990075Sobrien(p6)	fma.s1 f10 = f12, f10, f11
32090075Sobrien	;;
32190075Sobrien	// Round quotient to an unsigned integer.
32290075Sobrien	fcvt.fxu.trunc.s1 f10 = f10
32390075Sobrien	;;
32490075Sobrien	// r = q * (-b) + a
32590075Sobrien	xma.l f10 = f10, f9, f14
32690075Sobrien	;;
32790075Sobrien	// Transfer result to GP registers.
32890075Sobrien	getf.sig ret0 = f10
32990075Sobrien	br.ret.sptk rp
33090075Sobrien	;;
33190075Sobrien	.endp __umoddi3
33290075Sobrien#endif
33390075Sobrien
33490075Sobrien#ifdef L__divsi3
33590075Sobrien// Compute a 32-bit integer quotient.
33690075Sobrien//
33790075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency
33890075Sobrien// alternative.
33990075Sobrien//
34090075Sobrien// in0 holds the dividend.  in1 holds the divisor.
34190075Sobrien
34290075Sobrien	.text
34390075Sobrien	.align 16
34490075Sobrien	.global __divsi3
34590075Sobrien	.proc __divsi3
34690075Sobrien__divsi3:
34790075Sobrien	.regstk 2,0,0,0
34890075Sobrien	sxt4 in0 = in0
34990075Sobrien	sxt4 in1 = in1
35090075Sobrien	;;
35190075Sobrien	setf.sig f8 = in0
35290075Sobrien	setf.sig f9 = in1
35390075Sobrien	;;
35490075Sobrien	mov r2 = 0x0ffdd
35590075Sobrien	fcvt.xf f8 = f8
35690075Sobrien	fcvt.xf f9 = f9
35790075Sobrien	;;
35890075Sobrien	setf.exp f11 = r2
35990075Sobrien	frcpa.s1 f10, p6 = f8, f9
36090075Sobrien	;;
36190075Sobrien(p6)	fmpy.s1 f8 = f8, f10
36290075Sobrien(p6)	fnma.s1 f9 = f9, f10, f1
36390075Sobrien	;;
36490075Sobrien(p6)	fma.s1 f8 = f9, f8, f8
36590075Sobrien(p6)	fma.s1 f9 = f9, f9, f11
36690075Sobrien	;;
36790075Sobrien(p6)	fma.s1 f10 = f9, f8, f8
36890075Sobrien	;;
36990075Sobrien	fcvt.fx.trunc.s1 f10 = f10
37090075Sobrien	;;
37190075Sobrien	getf.sig ret0 = f10
37290075Sobrien	br.ret.sptk rp
37390075Sobrien	;;
37490075Sobrien	.endp __divsi3
37590075Sobrien#endif
37690075Sobrien
37790075Sobrien#ifdef L__modsi3
37890075Sobrien// Compute a 32-bit integer modulus.
37990075Sobrien//
38090075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency
38190075Sobrien// alternative.
38290075Sobrien//
38390075Sobrien// in0 holds the dividend.  in1 holds the divisor.
38490075Sobrien
38590075Sobrien	.text
38690075Sobrien	.align 16
38790075Sobrien	.global __modsi3
38890075Sobrien	.proc __modsi3
38990075Sobrien__modsi3:
39090075Sobrien	.regstk 2,0,0,0
39190075Sobrien	mov r2 = 0x0ffdd
39290075Sobrien	sxt4 in0 = in0
39390075Sobrien	sxt4 in1 = in1
39490075Sobrien	;;
39590075Sobrien	setf.sig f13 = r32
39690075Sobrien	setf.sig f9 = r33
39790075Sobrien	;;
39890075Sobrien	sub in1 = r0, in1
39990075Sobrien	fcvt.xf f8 = f13
40090075Sobrien	fcvt.xf f9 = f9
40190075Sobrien	;;
40290075Sobrien	setf.exp f11 = r2
40390075Sobrien	frcpa.s1 f10, p6 = f8, f9
40490075Sobrien	;;
40590075Sobrien(p6)	fmpy.s1 f12 = f8, f10
40690075Sobrien(p6)	fnma.s1 f10 = f9, f10, f1
40790075Sobrien	;;
40890075Sobrien	setf.sig f9 = in1
40990075Sobrien(p6)	fma.s1 f12 = f10, f12, f12
41090075Sobrien(p6)	fma.s1 f10 = f10, f10, f11	
41190075Sobrien	;;
41290075Sobrien(p6)	fma.s1 f10 = f10, f12, f12
41390075Sobrien	;;
41490075Sobrien	fcvt.fx.trunc.s1 f10 = f10
41590075Sobrien	;;
41690075Sobrien	xma.l f10 = f10, f9, f13
41790075Sobrien	;;
41890075Sobrien	getf.sig ret0 = f10
41990075Sobrien	br.ret.sptk rp
42090075Sobrien	;;
42190075Sobrien	.endp __modsi3
42290075Sobrien#endif
42390075Sobrien
42490075Sobrien#ifdef L__udivsi3
42590075Sobrien// Compute a 32-bit unsigned integer quotient.
42690075Sobrien//
42790075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency
42890075Sobrien// alternative.
42990075Sobrien//
43090075Sobrien// in0 holds the dividend.  in1 holds the divisor.
43190075Sobrien
43290075Sobrien	.text
43390075Sobrien	.align 16
43490075Sobrien	.global __udivsi3
43590075Sobrien	.proc __udivsi3
43690075Sobrien__udivsi3:
43790075Sobrien	.regstk 2,0,0,0
43890075Sobrien	mov r2 = 0x0ffdd
43990075Sobrien	zxt4 in0 = in0
44090075Sobrien	zxt4 in1 = in1
44190075Sobrien	;;
44290075Sobrien	setf.sig f8 = in0
44390075Sobrien	setf.sig f9 = in1
44490075Sobrien	;;
44590075Sobrien	fcvt.xf f8 = f8
44690075Sobrien	fcvt.xf f9 = f9
44790075Sobrien	;;
44890075Sobrien	setf.exp f11 = r2
44990075Sobrien	frcpa.s1 f10, p6 = f8, f9
45090075Sobrien	;;
45190075Sobrien(p6)	fmpy.s1 f8 = f8, f10
45290075Sobrien(p6)	fnma.s1 f9 = f9, f10, f1
45390075Sobrien	;;
45490075Sobrien(p6)	fma.s1 f8 = f9, f8, f8
45590075Sobrien(p6)	fma.s1 f9 = f9, f9, f11
45690075Sobrien	;;
45790075Sobrien(p6)	fma.s1 f10 = f9, f8, f8
45890075Sobrien	;;
45990075Sobrien	fcvt.fxu.trunc.s1 f10 = f10
46090075Sobrien	;;
46190075Sobrien	getf.sig ret0 = f10
46290075Sobrien	br.ret.sptk rp
46390075Sobrien	;;
46490075Sobrien	.endp __udivsi3
46590075Sobrien#endif
46690075Sobrien
46790075Sobrien#ifdef L__umodsi3
46890075Sobrien// Compute a 32-bit unsigned integer modulus.
46990075Sobrien//
47090075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency
47190075Sobrien// alternative.
47290075Sobrien//
47390075Sobrien// in0 holds the dividend.  in1 holds the divisor.
47490075Sobrien
47590075Sobrien	.text
47690075Sobrien	.align 16
47790075Sobrien	.global __umodsi3
47890075Sobrien	.proc __umodsi3
47990075Sobrien__umodsi3:
48090075Sobrien	.regstk 2,0,0,0
48190075Sobrien	mov r2 = 0x0ffdd
48290075Sobrien	zxt4 in0 = in0
48390075Sobrien	zxt4 in1 = in1
48490075Sobrien	;;
48590075Sobrien	setf.sig f13 = in0
48690075Sobrien	setf.sig f9 = in1
48790075Sobrien	;;
48890075Sobrien	sub in1 = r0, in1
48990075Sobrien	fcvt.xf f8 = f13
49090075Sobrien	fcvt.xf f9 = f9
49190075Sobrien	;;
49290075Sobrien	setf.exp f11 = r2
49390075Sobrien	frcpa.s1 f10, p6 = f8, f9
49490075Sobrien	;;
49590075Sobrien(p6)	fmpy.s1 f12 = f8, f10
49690075Sobrien(p6)	fnma.s1 f10 = f9, f10, f1
49790075Sobrien	;;
49890075Sobrien	setf.sig f9 = in1
49990075Sobrien(p6)	fma.s1 f12 = f10, f12, f12
50090075Sobrien(p6)	fma.s1 f10 = f10, f10, f11
50190075Sobrien	;;
50290075Sobrien(p6)	fma.s1 f10 = f10, f12, f12
50390075Sobrien	;;
50490075Sobrien	fcvt.fxu.trunc.s1 f10 = f10
50590075Sobrien	;;
50690075Sobrien	xma.l f10 = f10, f9, f13
50790075Sobrien	;;
50890075Sobrien	getf.sig ret0 = f10
50990075Sobrien	br.ret.sptk rp
51090075Sobrien	;;
51190075Sobrien	.endp __umodsi3
51290075Sobrien#endif
51390075Sobrien
51490075Sobrien#ifdef L__save_stack_nonlocal
51590075Sobrien// Notes on save/restore stack nonlocal: We read ar.bsp but write
51690075Sobrien// ar.bspstore.  This is because ar.bsp can be read at all times
51790075Sobrien// (independent of the RSE mode) but since it's read-only we need to
51890075Sobrien// restore the value via ar.bspstore.  This is OK because
51990075Sobrien// ar.bsp==ar.bspstore after executing "flushrs".
52090075Sobrien
52190075Sobrien// void __ia64_save_stack_nonlocal(void *save_area, void *stack_pointer)
52290075Sobrien
52390075Sobrien	.text
52490075Sobrien	.align 16
52590075Sobrien	.global __ia64_save_stack_nonlocal
52690075Sobrien	.proc __ia64_save_stack_nonlocal
52790075Sobrien__ia64_save_stack_nonlocal:
52890075Sobrien	{ .mmf
52990075Sobrien	  alloc r18 = ar.pfs, 2, 0, 0, 0
53090075Sobrien	  mov r19 = ar.rsc
53190075Sobrien	  ;;
53290075Sobrien	}
53390075Sobrien	{ .mmi
53490075Sobrien	  flushrs
53590075Sobrien	  st8 [in0] = in1, 24
53690075Sobrien	  and r19 = 0x1c, r19
53790075Sobrien	  ;;
53890075Sobrien	}
53990075Sobrien	{ .mmi
54090075Sobrien	  st8 [in0] = r18, -16
54190075Sobrien	  mov ar.rsc = r19
54290075Sobrien	  or r19 = 0x3, r19
54390075Sobrien	  ;;
54490075Sobrien	}
54590075Sobrien	{ .mmi
54690075Sobrien	  mov r16 = ar.bsp
54790075Sobrien	  mov r17 = ar.rnat
54890075Sobrien	  adds r2 = 8, in0
54990075Sobrien	  ;;
55090075Sobrien	}
55190075Sobrien	{ .mmi
55290075Sobrien	  st8 [in0] = r16
55390075Sobrien	  st8 [r2] = r17
55490075Sobrien	}
55590075Sobrien	{ .mib
55690075Sobrien	  mov ar.rsc = r19
55790075Sobrien	  br.ret.sptk.few rp
55890075Sobrien	  ;;
55990075Sobrien	}
56090075Sobrien	.endp __ia64_save_stack_nonlocal
56190075Sobrien#endif
56290075Sobrien
56390075Sobrien#ifdef L__nonlocal_goto
56490075Sobrien// void __ia64_nonlocal_goto(void *target_label, void *save_area,
56590075Sobrien//			     void *static_chain);
56690075Sobrien
56790075Sobrien	.text
56890075Sobrien	.align 16
56990075Sobrien	.global __ia64_nonlocal_goto
57090075Sobrien	.proc __ia64_nonlocal_goto
57190075Sobrien__ia64_nonlocal_goto:
57290075Sobrien	{ .mmi
57390075Sobrien	  alloc r20 = ar.pfs, 3, 0, 0, 0
57490075Sobrien	  ld8 r12 = [in1], 8
57590075Sobrien	  mov.ret.sptk rp = in0, .L0
57690075Sobrien	  ;;
57790075Sobrien	}
57890075Sobrien	{ .mmf
57990075Sobrien	  ld8 r16 = [in1], 8
58090075Sobrien	  mov r19 = ar.rsc
58190075Sobrien	  ;;
58290075Sobrien	}
58390075Sobrien	{ .mmi
58490075Sobrien	  flushrs
58590075Sobrien	  ld8 r17 = [in1], 8
58690075Sobrien	  and r19 = 0x1c, r19
58790075Sobrien	  ;;
58890075Sobrien	}
58990075Sobrien	{ .mmi
59090075Sobrien	  ld8 r18 = [in1]
59190075Sobrien	  mov ar.rsc = r19
59290075Sobrien	  or r19 = 0x3, r19
59390075Sobrien	  ;;
59490075Sobrien	}
59590075Sobrien	{ .mmi
59690075Sobrien	  mov ar.bspstore = r16
59790075Sobrien	  ;;
59890075Sobrien	  mov ar.rnat = r17
59990075Sobrien	  ;;
60090075Sobrien	}
60190075Sobrien	{ .mmi
60290075Sobrien	  loadrs
60390075Sobrien	  invala
60490075Sobrien	  mov r15 = in2
60590075Sobrien	  ;;
60690075Sobrien	}
60790075Sobrien.L0:	{ .mib
60890075Sobrien	  mov ar.rsc = r19
60990075Sobrien	  mov ar.pfs = r18
61090075Sobrien	  br.ret.sptk.few rp
61190075Sobrien	  ;;
61290075Sobrien	}
61390075Sobrien	.endp __ia64_nonlocal_goto
61490075Sobrien#endif
61590075Sobrien
61690075Sobrien#ifdef L__restore_stack_nonlocal
61790075Sobrien// This is mostly the same as nonlocal_goto above.
61890075Sobrien// ??? This has not been tested yet.
61990075Sobrien
62090075Sobrien// void __ia64_restore_stack_nonlocal(void *save_area)
62190075Sobrien
62290075Sobrien	.text
62390075Sobrien	.align 16
62490075Sobrien	.global __ia64_restore_stack_nonlocal
62590075Sobrien	.proc __ia64_restore_stack_nonlocal
62690075Sobrien__ia64_restore_stack_nonlocal:
62790075Sobrien	{ .mmf
62890075Sobrien	  alloc r20 = ar.pfs, 4, 0, 0, 0
62990075Sobrien	  ld8 r12 = [in0], 8
63090075Sobrien	  ;;
63190075Sobrien	}
63290075Sobrien	{ .mmb
63390075Sobrien	  ld8 r16=[in0], 8
63490075Sobrien	  mov r19 = ar.rsc
63590075Sobrien	  ;;
63690075Sobrien	}
63790075Sobrien	{ .mmi
63890075Sobrien	  flushrs
63990075Sobrien	  ld8 r17 = [in0], 8
64090075Sobrien	  and r19 = 0x1c, r19
64190075Sobrien	  ;;
64290075Sobrien	}
64390075Sobrien	{ .mmf
64490075Sobrien	  ld8 r18 = [in0]
64590075Sobrien	  mov ar.rsc = r19
64690075Sobrien	  ;;
64790075Sobrien	}
64890075Sobrien	{ .mmi
64990075Sobrien	  mov ar.bspstore = r16
65090075Sobrien	  ;;
65190075Sobrien	  mov ar.rnat = r17
65290075Sobrien	  or r19 = 0x3, r19
65390075Sobrien	  ;;
65490075Sobrien	}
65590075Sobrien	{ .mmf
65690075Sobrien	  loadrs
65790075Sobrien	  invala
65890075Sobrien	  ;;
65990075Sobrien	}
66090075Sobrien.L0:	{ .mib
66190075Sobrien	  mov ar.rsc = r19
66290075Sobrien	  mov ar.pfs = r18
66390075Sobrien	  br.ret.sptk.few rp
66490075Sobrien	  ;;
66590075Sobrien	}
66690075Sobrien	.endp __ia64_restore_stack_nonlocal
66790075Sobrien#endif
66890075Sobrien
66990075Sobrien#ifdef L__trampoline
67090075Sobrien// Implement the nested function trampoline.  This is out of line
67190075Sobrien// so that we don't have to bother with flushing the icache, as
67290075Sobrien// well as making the on-stack trampoline smaller.
67390075Sobrien//
67490075Sobrien// The trampoline has the following form:
67590075Sobrien//
67690075Sobrien//		+-------------------+ >
67790075Sobrien//	TRAMP:	| __ia64_trampoline | |
67890075Sobrien//		+-------------------+  > fake function descriptor
67990075Sobrien//		| TRAMP+16          | |
68090075Sobrien//		+-------------------+ >
68190075Sobrien//		| target descriptor |
68290075Sobrien//		+-------------------+
68390075Sobrien//		| static link	    |
68490075Sobrien//		+-------------------+
68590075Sobrien
68690075Sobrien	.text
68790075Sobrien	.align 16
68890075Sobrien	.global __ia64_trampoline
68990075Sobrien	.proc __ia64_trampoline
69090075Sobrien__ia64_trampoline:
69190075Sobrien	{ .mmi
69290075Sobrien	  ld8 r2 = [r1], 8
69390075Sobrien	  ;;
69490075Sobrien	  ld8 r15 = [r1]
69590075Sobrien	}
69690075Sobrien	{ .mmi
69790075Sobrien	  ld8 r3 = [r2], 8
69890075Sobrien	  ;;
69990075Sobrien	  ld8 r1 = [r2]
70090075Sobrien	  mov b6 = r3
70190075Sobrien	}
70290075Sobrien	{ .bbb
70390075Sobrien	  br.sptk.many b6
70490075Sobrien	  ;;
70590075Sobrien	}
70690075Sobrien	.endp __ia64_trampoline
70790075Sobrien#endif
708132718Skan
709132718Skan#ifdef L__compat
710132718Skan// Thunks for backward compatibility.
711132718Skan
712132718Skan	.text
713132718Skan	.align 16
714132718Skan	.global __fixtfti
715132718Skan	.proc __fixtfti
716132718Skan__fixtfti:
717132718Skan	{ .bbb
718132718Skan	  br.sptk.many __fixxfti
719132718Skan	  ;;
720132718Skan	}
721132718Skan	.endp __fixtfti
722132718Skan
723132718Skan	.align 16
724132718Skan	.global __fixunstfti
725132718Skan	.proc __fixunstfti
726132718Skan__fixunstfti:
727132718Skan	{ .bbb
728132718Skan	  br.sptk.many __fixunsxfti
729132718Skan	  ;;
730132718Skan	}
731132718Skan	.endp __fixunstfti
732132718Skan
733132718Skan	.align 16
734132718Skan	.global __floattitf
735132718Skan	.proc __floattitf
736132718Skan__floattitf:
737132718Skan	{ .bbb
738132718Skan	  br.sptk.many __floattixf
739132718Skan	  ;;
740132718Skan	}
741132718Skan	.endp __floattitf
742132718Skan
743132718Skan#endif
744