lib1funcs.asm revision 132718
1132718Skan#ifdef L__divxf3 290075Sobrien// Compute a 80-bit IEEE double-extended quotient. 390075Sobrien// 490075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency 590075Sobrien// alternative. 690075Sobrien// 790075Sobrien// farg0 holds the dividend. farg1 holds the divisor. 8132718Skan// 9132718Skan// __divtf3 is an alternate symbol name for backward compatibility. 1090075Sobrien 1190075Sobrien .text 1290075Sobrien .align 16 13132718Skan .global __divxf3 1490075Sobrien .global __divtf3 15132718Skan .proc __divxf3 16132718Skan__divxf3: 1790075Sobrien__divtf3: 1890075Sobrien cmp.eq p7, p0 = r0, r0 1990075Sobrien frcpa.s0 f10, p6 = farg0, farg1 2090075Sobrien ;; 2190075Sobrien(p6) cmp.ne p7, p0 = r0, r0 2290075Sobrien .pred.rel.mutex p6, p7 2390075Sobrien(p6) fnma.s1 f11 = farg1, f10, f1 2490075Sobrien(p6) fma.s1 f12 = farg0, f10, f0 2590075Sobrien ;; 2690075Sobrien(p6) fma.s1 f13 = f11, f11, f0 2790075Sobrien(p6) fma.s1 f14 = f11, f11, f11 2890075Sobrien ;; 2990075Sobrien(p6) fma.s1 f11 = f13, f13, f11 3090075Sobrien(p6) fma.s1 f13 = f14, f10, f10 3190075Sobrien ;; 3290075Sobrien(p6) fma.s1 f10 = f13, f11, f10 3390075Sobrien(p6) fnma.s1 f11 = farg1, f12, farg0 3490075Sobrien ;; 3590075Sobrien(p6) fma.s1 f11 = f11, f10, f12 3690075Sobrien(p6) fnma.s1 f12 = farg1, f10, f1 3790075Sobrien ;; 3890075Sobrien(p6) fma.s1 f10 = f12, f10, f10 3990075Sobrien(p6) fnma.s1 f12 = farg1, f11, farg0 4090075Sobrien ;; 4190075Sobrien(p6) fma.s0 fret0 = f12, f10, f11 4290075Sobrien(p7) mov fret0 = f10 4390075Sobrien br.ret.sptk rp 44132718Skan .endp __divxf3 4590075Sobrien#endif 4690075Sobrien 4790075Sobrien#ifdef L__divdf3 4890075Sobrien// Compute a 64-bit IEEE double quotient. 4990075Sobrien// 5090075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency 5190075Sobrien// alternative. 5290075Sobrien// 5390075Sobrien// farg0 holds the dividend. farg1 holds the divisor. 5490075Sobrien 5590075Sobrien .text 5690075Sobrien .align 16 5790075Sobrien .global __divdf3 5890075Sobrien .proc __divdf3 5990075Sobrien__divdf3: 6090075Sobrien cmp.eq p7, p0 = r0, r0 6190075Sobrien frcpa.s0 f10, p6 = farg0, farg1 6290075Sobrien ;; 6390075Sobrien(p6) cmp.ne p7, p0 = r0, r0 6490075Sobrien .pred.rel.mutex p6, p7 6590075Sobrien(p6) fmpy.s1 f11 = farg0, f10 6690075Sobrien(p6) fnma.s1 f12 = farg1, f10, f1 6790075Sobrien ;; 6890075Sobrien(p6) fma.s1 f11 = f12, f11, f11 6990075Sobrien(p6) fmpy.s1 f13 = f12, f12 7090075Sobrien ;; 7190075Sobrien(p6) fma.s1 f10 = f12, f10, f10 7290075Sobrien(p6) fma.s1 f11 = f13, f11, f11 7390075Sobrien ;; 7490075Sobrien(p6) fmpy.s1 f12 = f13, f13 7590075Sobrien(p6) fma.s1 f10 = f13, f10, f10 7690075Sobrien ;; 7790075Sobrien(p6) fma.d.s1 f11 = f12, f11, f11 7890075Sobrien(p6) fma.s1 f10 = f12, f10, f10 7990075Sobrien ;; 8090075Sobrien(p6) fnma.d.s1 f8 = farg1, f11, farg0 8190075Sobrien ;; 8290075Sobrien(p6) fma.d fret0 = f8, f10, f11 8390075Sobrien(p7) mov fret0 = f10 8490075Sobrien br.ret.sptk rp 8590075Sobrien ;; 8690075Sobrien .endp __divdf3 8790075Sobrien#endif 8890075Sobrien 8990075Sobrien#ifdef L__divsf3 9090075Sobrien// Compute a 32-bit IEEE float quotient. 9190075Sobrien// 9290075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency 9390075Sobrien// alternative. 9490075Sobrien// 9590075Sobrien// farg0 holds the dividend. farg1 holds the divisor. 9690075Sobrien 9790075Sobrien .text 9890075Sobrien .align 16 9990075Sobrien .global __divsf3 10090075Sobrien .proc __divsf3 10190075Sobrien__divsf3: 10290075Sobrien cmp.eq p7, p0 = r0, r0 10390075Sobrien frcpa.s0 f10, p6 = farg0, farg1 10490075Sobrien ;; 10590075Sobrien(p6) cmp.ne p7, p0 = r0, r0 10690075Sobrien .pred.rel.mutex p6, p7 10790075Sobrien(p6) fmpy.s1 f8 = farg0, f10 10890075Sobrien(p6) fnma.s1 f9 = farg1, f10, f1 10990075Sobrien ;; 11090075Sobrien(p6) fma.s1 f8 = f9, f8, f8 11190075Sobrien(p6) fmpy.s1 f9 = f9, f9 11290075Sobrien ;; 11390075Sobrien(p6) fma.s1 f8 = f9, f8, f8 11490075Sobrien(p6) fmpy.s1 f9 = f9, f9 11590075Sobrien ;; 11690075Sobrien(p6) fma.d.s1 f10 = f9, f8, f8 11790075Sobrien ;; 11890075Sobrien(p6) fnorm.s.s0 fret0 = f10 11990075Sobrien(p7) mov fret0 = f10 12090075Sobrien br.ret.sptk rp 12190075Sobrien ;; 12290075Sobrien .endp __divsf3 12390075Sobrien#endif 12490075Sobrien 12590075Sobrien#ifdef L__divdi3 12690075Sobrien// Compute a 64-bit integer quotient. 12790075Sobrien// 12890075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency 12990075Sobrien// alternative. 13090075Sobrien// 13190075Sobrien// in0 holds the dividend. in1 holds the divisor. 13290075Sobrien 13390075Sobrien .text 13490075Sobrien .align 16 13590075Sobrien .global __divdi3 13690075Sobrien .proc __divdi3 13790075Sobrien__divdi3: 13890075Sobrien .regstk 2,0,0,0 13990075Sobrien // Transfer inputs to FP registers. 14090075Sobrien setf.sig f8 = in0 14190075Sobrien setf.sig f9 = in1 14290075Sobrien ;; 14390075Sobrien // Convert the inputs to FP, so that they won't be treated as unsigned. 14490075Sobrien fcvt.xf f8 = f8 14590075Sobrien fcvt.xf f9 = f9 14690075Sobrien ;; 14790075Sobrien // Compute the reciprocal approximation. 14890075Sobrien frcpa.s1 f10, p6 = f8, f9 14990075Sobrien ;; 15090075Sobrien // 3 Newton-Raphson iterations. 15190075Sobrien(p6) fnma.s1 f11 = f9, f10, f1 15290075Sobrien(p6) fmpy.s1 f12 = f8, f10 15390075Sobrien ;; 15490075Sobrien(p6) fmpy.s1 f13 = f11, f11 15590075Sobrien(p6) fma.s1 f12 = f11, f12, f12 15690075Sobrien ;; 15790075Sobrien(p6) fma.s1 f10 = f11, f10, f10 15890075Sobrien(p6) fma.s1 f11 = f13, f12, f12 15990075Sobrien ;; 16090075Sobrien(p6) fma.s1 f10 = f13, f10, f10 16190075Sobrien(p6) fnma.s1 f12 = f9, f11, f8 16290075Sobrien ;; 16390075Sobrien(p6) fma.s1 f10 = f12, f10, f11 16490075Sobrien ;; 16590075Sobrien // Round quotient to an integer. 16690075Sobrien fcvt.fx.trunc.s1 f10 = f10 16790075Sobrien ;; 16890075Sobrien // Transfer result to GP registers. 16990075Sobrien getf.sig ret0 = f10 17090075Sobrien br.ret.sptk rp 17190075Sobrien ;; 17290075Sobrien .endp __divdi3 17390075Sobrien#endif 17490075Sobrien 17590075Sobrien#ifdef L__moddi3 17690075Sobrien// Compute a 64-bit integer modulus. 17790075Sobrien// 17890075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency 17990075Sobrien// alternative. 18090075Sobrien// 18190075Sobrien// in0 holds the dividend (a). in1 holds the divisor (b). 18290075Sobrien 18390075Sobrien .text 18490075Sobrien .align 16 18590075Sobrien .global __moddi3 18690075Sobrien .proc __moddi3 18790075Sobrien__moddi3: 18890075Sobrien .regstk 2,0,0,0 18990075Sobrien // Transfer inputs to FP registers. 19090075Sobrien setf.sig f14 = in0 19190075Sobrien setf.sig f9 = in1 19290075Sobrien ;; 19390075Sobrien // Convert the inputs to FP, so that they won't be treated as unsigned. 19490075Sobrien fcvt.xf f8 = f14 19590075Sobrien fcvt.xf f9 = f9 19690075Sobrien ;; 19790075Sobrien // Compute the reciprocal approximation. 19890075Sobrien frcpa.s1 f10, p6 = f8, f9 19990075Sobrien ;; 20090075Sobrien // 3 Newton-Raphson iterations. 20190075Sobrien(p6) fmpy.s1 f12 = f8, f10 20290075Sobrien(p6) fnma.s1 f11 = f9, f10, f1 20390075Sobrien ;; 20490075Sobrien(p6) fma.s1 f12 = f11, f12, f12 20590075Sobrien(p6) fmpy.s1 f13 = f11, f11 20690075Sobrien ;; 20790075Sobrien(p6) fma.s1 f10 = f11, f10, f10 20890075Sobrien(p6) fma.s1 f11 = f13, f12, f12 20990075Sobrien ;; 21090075Sobrien sub in1 = r0, in1 21190075Sobrien(p6) fma.s1 f10 = f13, f10, f10 21290075Sobrien(p6) fnma.s1 f12 = f9, f11, f8 21390075Sobrien ;; 21490075Sobrien setf.sig f9 = in1 21590075Sobrien(p6) fma.s1 f10 = f12, f10, f11 21690075Sobrien ;; 21790075Sobrien fcvt.fx.trunc.s1 f10 = f10 21890075Sobrien ;; 21990075Sobrien // r = q * (-b) + a 22090075Sobrien xma.l f10 = f10, f9, f14 22190075Sobrien ;; 22290075Sobrien // Transfer result to GP registers. 22390075Sobrien getf.sig ret0 = f10 22490075Sobrien br.ret.sptk rp 22590075Sobrien ;; 22690075Sobrien .endp __moddi3 22790075Sobrien#endif 22890075Sobrien 22990075Sobrien#ifdef L__udivdi3 23090075Sobrien// Compute a 64-bit unsigned integer quotient. 23190075Sobrien// 23290075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency 23390075Sobrien// alternative. 23490075Sobrien// 23590075Sobrien// in0 holds the dividend. in1 holds the divisor. 23690075Sobrien 23790075Sobrien .text 23890075Sobrien .align 16 23990075Sobrien .global __udivdi3 24090075Sobrien .proc __udivdi3 24190075Sobrien__udivdi3: 24290075Sobrien .regstk 2,0,0,0 24390075Sobrien // Transfer inputs to FP registers. 24490075Sobrien setf.sig f8 = in0 24590075Sobrien setf.sig f9 = in1 24690075Sobrien ;; 24790075Sobrien // Convert the inputs to FP, to avoid FP software-assist faults. 24890075Sobrien fcvt.xuf.s1 f8 = f8 24990075Sobrien fcvt.xuf.s1 f9 = f9 25090075Sobrien ;; 25190075Sobrien // Compute the reciprocal approximation. 25290075Sobrien frcpa.s1 f10, p6 = f8, f9 25390075Sobrien ;; 25490075Sobrien // 3 Newton-Raphson iterations. 25590075Sobrien(p6) fnma.s1 f11 = f9, f10, f1 25690075Sobrien(p6) fmpy.s1 f12 = f8, f10 25790075Sobrien ;; 25890075Sobrien(p6) fmpy.s1 f13 = f11, f11 25990075Sobrien(p6) fma.s1 f12 = f11, f12, f12 26090075Sobrien ;; 26190075Sobrien(p6) fma.s1 f10 = f11, f10, f10 26290075Sobrien(p6) fma.s1 f11 = f13, f12, f12 26390075Sobrien ;; 26490075Sobrien(p6) fma.s1 f10 = f13, f10, f10 26590075Sobrien(p6) fnma.s1 f12 = f9, f11, f8 26690075Sobrien ;; 26790075Sobrien(p6) fma.s1 f10 = f12, f10, f11 26890075Sobrien ;; 26990075Sobrien // Round quotient to an unsigned integer. 27090075Sobrien fcvt.fxu.trunc.s1 f10 = f10 27190075Sobrien ;; 27290075Sobrien // Transfer result to GP registers. 27390075Sobrien getf.sig ret0 = f10 27490075Sobrien br.ret.sptk rp 27590075Sobrien ;; 27690075Sobrien .endp __udivdi3 27790075Sobrien#endif 27890075Sobrien 27990075Sobrien#ifdef L__umoddi3 28090075Sobrien// Compute a 64-bit unsigned integer modulus. 28190075Sobrien// 28290075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency 28390075Sobrien// alternative. 28490075Sobrien// 28590075Sobrien// in0 holds the dividend (a). in1 holds the divisor (b). 28690075Sobrien 28790075Sobrien .text 28890075Sobrien .align 16 28990075Sobrien .global __umoddi3 29090075Sobrien .proc __umoddi3 29190075Sobrien__umoddi3: 29290075Sobrien .regstk 2,0,0,0 29390075Sobrien // Transfer inputs to FP registers. 29490075Sobrien setf.sig f14 = in0 29590075Sobrien setf.sig f9 = in1 29690075Sobrien ;; 29790075Sobrien // Convert the inputs to FP, to avoid FP software assist faults. 29890075Sobrien fcvt.xuf.s1 f8 = f14 29990075Sobrien fcvt.xuf.s1 f9 = f9 30090075Sobrien ;; 30190075Sobrien // Compute the reciprocal approximation. 30290075Sobrien frcpa.s1 f10, p6 = f8, f9 30390075Sobrien ;; 30490075Sobrien // 3 Newton-Raphson iterations. 30590075Sobrien(p6) fmpy.s1 f12 = f8, f10 30690075Sobrien(p6) fnma.s1 f11 = f9, f10, f1 30790075Sobrien ;; 30890075Sobrien(p6) fma.s1 f12 = f11, f12, f12 30990075Sobrien(p6) fmpy.s1 f13 = f11, f11 31090075Sobrien ;; 31190075Sobrien(p6) fma.s1 f10 = f11, f10, f10 31290075Sobrien(p6) fma.s1 f11 = f13, f12, f12 31390075Sobrien ;; 31490075Sobrien sub in1 = r0, in1 31590075Sobrien(p6) fma.s1 f10 = f13, f10, f10 31690075Sobrien(p6) fnma.s1 f12 = f9, f11, f8 31790075Sobrien ;; 31890075Sobrien setf.sig f9 = in1 31990075Sobrien(p6) fma.s1 f10 = f12, f10, f11 32090075Sobrien ;; 32190075Sobrien // Round quotient to an unsigned integer. 32290075Sobrien fcvt.fxu.trunc.s1 f10 = f10 32390075Sobrien ;; 32490075Sobrien // r = q * (-b) + a 32590075Sobrien xma.l f10 = f10, f9, f14 32690075Sobrien ;; 32790075Sobrien // Transfer result to GP registers. 32890075Sobrien getf.sig ret0 = f10 32990075Sobrien br.ret.sptk rp 33090075Sobrien ;; 33190075Sobrien .endp __umoddi3 33290075Sobrien#endif 33390075Sobrien 33490075Sobrien#ifdef L__divsi3 33590075Sobrien// Compute a 32-bit integer quotient. 33690075Sobrien// 33790075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency 33890075Sobrien// alternative. 33990075Sobrien// 34090075Sobrien// in0 holds the dividend. in1 holds the divisor. 34190075Sobrien 34290075Sobrien .text 34390075Sobrien .align 16 34490075Sobrien .global __divsi3 34590075Sobrien .proc __divsi3 34690075Sobrien__divsi3: 34790075Sobrien .regstk 2,0,0,0 34890075Sobrien sxt4 in0 = in0 34990075Sobrien sxt4 in1 = in1 35090075Sobrien ;; 35190075Sobrien setf.sig f8 = in0 35290075Sobrien setf.sig f9 = in1 35390075Sobrien ;; 35490075Sobrien mov r2 = 0x0ffdd 35590075Sobrien fcvt.xf f8 = f8 35690075Sobrien fcvt.xf f9 = f9 35790075Sobrien ;; 35890075Sobrien setf.exp f11 = r2 35990075Sobrien frcpa.s1 f10, p6 = f8, f9 36090075Sobrien ;; 36190075Sobrien(p6) fmpy.s1 f8 = f8, f10 36290075Sobrien(p6) fnma.s1 f9 = f9, f10, f1 36390075Sobrien ;; 36490075Sobrien(p6) fma.s1 f8 = f9, f8, f8 36590075Sobrien(p6) fma.s1 f9 = f9, f9, f11 36690075Sobrien ;; 36790075Sobrien(p6) fma.s1 f10 = f9, f8, f8 36890075Sobrien ;; 36990075Sobrien fcvt.fx.trunc.s1 f10 = f10 37090075Sobrien ;; 37190075Sobrien getf.sig ret0 = f10 37290075Sobrien br.ret.sptk rp 37390075Sobrien ;; 37490075Sobrien .endp __divsi3 37590075Sobrien#endif 37690075Sobrien 37790075Sobrien#ifdef L__modsi3 37890075Sobrien// Compute a 32-bit integer modulus. 37990075Sobrien// 38090075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency 38190075Sobrien// alternative. 38290075Sobrien// 38390075Sobrien// in0 holds the dividend. in1 holds the divisor. 38490075Sobrien 38590075Sobrien .text 38690075Sobrien .align 16 38790075Sobrien .global __modsi3 38890075Sobrien .proc __modsi3 38990075Sobrien__modsi3: 39090075Sobrien .regstk 2,0,0,0 39190075Sobrien mov r2 = 0x0ffdd 39290075Sobrien sxt4 in0 = in0 39390075Sobrien sxt4 in1 = in1 39490075Sobrien ;; 39590075Sobrien setf.sig f13 = r32 39690075Sobrien setf.sig f9 = r33 39790075Sobrien ;; 39890075Sobrien sub in1 = r0, in1 39990075Sobrien fcvt.xf f8 = f13 40090075Sobrien fcvt.xf f9 = f9 40190075Sobrien ;; 40290075Sobrien setf.exp f11 = r2 40390075Sobrien frcpa.s1 f10, p6 = f8, f9 40490075Sobrien ;; 40590075Sobrien(p6) fmpy.s1 f12 = f8, f10 40690075Sobrien(p6) fnma.s1 f10 = f9, f10, f1 40790075Sobrien ;; 40890075Sobrien setf.sig f9 = in1 40990075Sobrien(p6) fma.s1 f12 = f10, f12, f12 41090075Sobrien(p6) fma.s1 f10 = f10, f10, f11 41190075Sobrien ;; 41290075Sobrien(p6) fma.s1 f10 = f10, f12, f12 41390075Sobrien ;; 41490075Sobrien fcvt.fx.trunc.s1 f10 = f10 41590075Sobrien ;; 41690075Sobrien xma.l f10 = f10, f9, f13 41790075Sobrien ;; 41890075Sobrien getf.sig ret0 = f10 41990075Sobrien br.ret.sptk rp 42090075Sobrien ;; 42190075Sobrien .endp __modsi3 42290075Sobrien#endif 42390075Sobrien 42490075Sobrien#ifdef L__udivsi3 42590075Sobrien// Compute a 32-bit unsigned integer quotient. 42690075Sobrien// 42790075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency 42890075Sobrien// alternative. 42990075Sobrien// 43090075Sobrien// in0 holds the dividend. in1 holds the divisor. 43190075Sobrien 43290075Sobrien .text 43390075Sobrien .align 16 43490075Sobrien .global __udivsi3 43590075Sobrien .proc __udivsi3 43690075Sobrien__udivsi3: 43790075Sobrien .regstk 2,0,0,0 43890075Sobrien mov r2 = 0x0ffdd 43990075Sobrien zxt4 in0 = in0 44090075Sobrien zxt4 in1 = in1 44190075Sobrien ;; 44290075Sobrien setf.sig f8 = in0 44390075Sobrien setf.sig f9 = in1 44490075Sobrien ;; 44590075Sobrien fcvt.xf f8 = f8 44690075Sobrien fcvt.xf f9 = f9 44790075Sobrien ;; 44890075Sobrien setf.exp f11 = r2 44990075Sobrien frcpa.s1 f10, p6 = f8, f9 45090075Sobrien ;; 45190075Sobrien(p6) fmpy.s1 f8 = f8, f10 45290075Sobrien(p6) fnma.s1 f9 = f9, f10, f1 45390075Sobrien ;; 45490075Sobrien(p6) fma.s1 f8 = f9, f8, f8 45590075Sobrien(p6) fma.s1 f9 = f9, f9, f11 45690075Sobrien ;; 45790075Sobrien(p6) fma.s1 f10 = f9, f8, f8 45890075Sobrien ;; 45990075Sobrien fcvt.fxu.trunc.s1 f10 = f10 46090075Sobrien ;; 46190075Sobrien getf.sig ret0 = f10 46290075Sobrien br.ret.sptk rp 46390075Sobrien ;; 46490075Sobrien .endp __udivsi3 46590075Sobrien#endif 46690075Sobrien 46790075Sobrien#ifdef L__umodsi3 46890075Sobrien// Compute a 32-bit unsigned integer modulus. 46990075Sobrien// 47090075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency 47190075Sobrien// alternative. 47290075Sobrien// 47390075Sobrien// in0 holds the dividend. in1 holds the divisor. 47490075Sobrien 47590075Sobrien .text 47690075Sobrien .align 16 47790075Sobrien .global __umodsi3 47890075Sobrien .proc __umodsi3 47990075Sobrien__umodsi3: 48090075Sobrien .regstk 2,0,0,0 48190075Sobrien mov r2 = 0x0ffdd 48290075Sobrien zxt4 in0 = in0 48390075Sobrien zxt4 in1 = in1 48490075Sobrien ;; 48590075Sobrien setf.sig f13 = in0 48690075Sobrien setf.sig f9 = in1 48790075Sobrien ;; 48890075Sobrien sub in1 = r0, in1 48990075Sobrien fcvt.xf f8 = f13 49090075Sobrien fcvt.xf f9 = f9 49190075Sobrien ;; 49290075Sobrien setf.exp f11 = r2 49390075Sobrien frcpa.s1 f10, p6 = f8, f9 49490075Sobrien ;; 49590075Sobrien(p6) fmpy.s1 f12 = f8, f10 49690075Sobrien(p6) fnma.s1 f10 = f9, f10, f1 49790075Sobrien ;; 49890075Sobrien setf.sig f9 = in1 49990075Sobrien(p6) fma.s1 f12 = f10, f12, f12 50090075Sobrien(p6) fma.s1 f10 = f10, f10, f11 50190075Sobrien ;; 50290075Sobrien(p6) fma.s1 f10 = f10, f12, f12 50390075Sobrien ;; 50490075Sobrien fcvt.fxu.trunc.s1 f10 = f10 50590075Sobrien ;; 50690075Sobrien xma.l f10 = f10, f9, f13 50790075Sobrien ;; 50890075Sobrien getf.sig ret0 = f10 50990075Sobrien br.ret.sptk rp 51090075Sobrien ;; 51190075Sobrien .endp __umodsi3 51290075Sobrien#endif 51390075Sobrien 51490075Sobrien#ifdef L__save_stack_nonlocal 51590075Sobrien// Notes on save/restore stack nonlocal: We read ar.bsp but write 51690075Sobrien// ar.bspstore. This is because ar.bsp can be read at all times 51790075Sobrien// (independent of the RSE mode) but since it's read-only we need to 51890075Sobrien// restore the value via ar.bspstore. This is OK because 51990075Sobrien// ar.bsp==ar.bspstore after executing "flushrs". 52090075Sobrien 52190075Sobrien// void __ia64_save_stack_nonlocal(void *save_area, void *stack_pointer) 52290075Sobrien 52390075Sobrien .text 52490075Sobrien .align 16 52590075Sobrien .global __ia64_save_stack_nonlocal 52690075Sobrien .proc __ia64_save_stack_nonlocal 52790075Sobrien__ia64_save_stack_nonlocal: 52890075Sobrien { .mmf 52990075Sobrien alloc r18 = ar.pfs, 2, 0, 0, 0 53090075Sobrien mov r19 = ar.rsc 53190075Sobrien ;; 53290075Sobrien } 53390075Sobrien { .mmi 53490075Sobrien flushrs 53590075Sobrien st8 [in0] = in1, 24 53690075Sobrien and r19 = 0x1c, r19 53790075Sobrien ;; 53890075Sobrien } 53990075Sobrien { .mmi 54090075Sobrien st8 [in0] = r18, -16 54190075Sobrien mov ar.rsc = r19 54290075Sobrien or r19 = 0x3, r19 54390075Sobrien ;; 54490075Sobrien } 54590075Sobrien { .mmi 54690075Sobrien mov r16 = ar.bsp 54790075Sobrien mov r17 = ar.rnat 54890075Sobrien adds r2 = 8, in0 54990075Sobrien ;; 55090075Sobrien } 55190075Sobrien { .mmi 55290075Sobrien st8 [in0] = r16 55390075Sobrien st8 [r2] = r17 55490075Sobrien } 55590075Sobrien { .mib 55690075Sobrien mov ar.rsc = r19 55790075Sobrien br.ret.sptk.few rp 55890075Sobrien ;; 55990075Sobrien } 56090075Sobrien .endp __ia64_save_stack_nonlocal 56190075Sobrien#endif 56290075Sobrien 56390075Sobrien#ifdef L__nonlocal_goto 56490075Sobrien// void __ia64_nonlocal_goto(void *target_label, void *save_area, 56590075Sobrien// void *static_chain); 56690075Sobrien 56790075Sobrien .text 56890075Sobrien .align 16 56990075Sobrien .global __ia64_nonlocal_goto 57090075Sobrien .proc __ia64_nonlocal_goto 57190075Sobrien__ia64_nonlocal_goto: 57290075Sobrien { .mmi 57390075Sobrien alloc r20 = ar.pfs, 3, 0, 0, 0 57490075Sobrien ld8 r12 = [in1], 8 57590075Sobrien mov.ret.sptk rp = in0, .L0 57690075Sobrien ;; 57790075Sobrien } 57890075Sobrien { .mmf 57990075Sobrien ld8 r16 = [in1], 8 58090075Sobrien mov r19 = ar.rsc 58190075Sobrien ;; 58290075Sobrien } 58390075Sobrien { .mmi 58490075Sobrien flushrs 58590075Sobrien ld8 r17 = [in1], 8 58690075Sobrien and r19 = 0x1c, r19 58790075Sobrien ;; 58890075Sobrien } 58990075Sobrien { .mmi 59090075Sobrien ld8 r18 = [in1] 59190075Sobrien mov ar.rsc = r19 59290075Sobrien or r19 = 0x3, r19 59390075Sobrien ;; 59490075Sobrien } 59590075Sobrien { .mmi 59690075Sobrien mov ar.bspstore = r16 59790075Sobrien ;; 59890075Sobrien mov ar.rnat = r17 59990075Sobrien ;; 60090075Sobrien } 60190075Sobrien { .mmi 60290075Sobrien loadrs 60390075Sobrien invala 60490075Sobrien mov r15 = in2 60590075Sobrien ;; 60690075Sobrien } 60790075Sobrien.L0: { .mib 60890075Sobrien mov ar.rsc = r19 60990075Sobrien mov ar.pfs = r18 61090075Sobrien br.ret.sptk.few rp 61190075Sobrien ;; 61290075Sobrien } 61390075Sobrien .endp __ia64_nonlocal_goto 61490075Sobrien#endif 61590075Sobrien 61690075Sobrien#ifdef L__restore_stack_nonlocal 61790075Sobrien// This is mostly the same as nonlocal_goto above. 61890075Sobrien// ??? This has not been tested yet. 61990075Sobrien 62090075Sobrien// void __ia64_restore_stack_nonlocal(void *save_area) 62190075Sobrien 62290075Sobrien .text 62390075Sobrien .align 16 62490075Sobrien .global __ia64_restore_stack_nonlocal 62590075Sobrien .proc __ia64_restore_stack_nonlocal 62690075Sobrien__ia64_restore_stack_nonlocal: 62790075Sobrien { .mmf 62890075Sobrien alloc r20 = ar.pfs, 4, 0, 0, 0 62990075Sobrien ld8 r12 = [in0], 8 63090075Sobrien ;; 63190075Sobrien } 63290075Sobrien { .mmb 63390075Sobrien ld8 r16=[in0], 8 63490075Sobrien mov r19 = ar.rsc 63590075Sobrien ;; 63690075Sobrien } 63790075Sobrien { .mmi 63890075Sobrien flushrs 63990075Sobrien ld8 r17 = [in0], 8 64090075Sobrien and r19 = 0x1c, r19 64190075Sobrien ;; 64290075Sobrien } 64390075Sobrien { .mmf 64490075Sobrien ld8 r18 = [in0] 64590075Sobrien mov ar.rsc = r19 64690075Sobrien ;; 64790075Sobrien } 64890075Sobrien { .mmi 64990075Sobrien mov ar.bspstore = r16 65090075Sobrien ;; 65190075Sobrien mov ar.rnat = r17 65290075Sobrien or r19 = 0x3, r19 65390075Sobrien ;; 65490075Sobrien } 65590075Sobrien { .mmf 65690075Sobrien loadrs 65790075Sobrien invala 65890075Sobrien ;; 65990075Sobrien } 66090075Sobrien.L0: { .mib 66190075Sobrien mov ar.rsc = r19 66290075Sobrien mov ar.pfs = r18 66390075Sobrien br.ret.sptk.few rp 66490075Sobrien ;; 66590075Sobrien } 66690075Sobrien .endp __ia64_restore_stack_nonlocal 66790075Sobrien#endif 66890075Sobrien 66990075Sobrien#ifdef L__trampoline 67090075Sobrien// Implement the nested function trampoline. This is out of line 67190075Sobrien// so that we don't have to bother with flushing the icache, as 67290075Sobrien// well as making the on-stack trampoline smaller. 67390075Sobrien// 67490075Sobrien// The trampoline has the following form: 67590075Sobrien// 67690075Sobrien// +-------------------+ > 67790075Sobrien// TRAMP: | __ia64_trampoline | | 67890075Sobrien// +-------------------+ > fake function descriptor 67990075Sobrien// | TRAMP+16 | | 68090075Sobrien// +-------------------+ > 68190075Sobrien// | target descriptor | 68290075Sobrien// +-------------------+ 68390075Sobrien// | static link | 68490075Sobrien// +-------------------+ 68590075Sobrien 68690075Sobrien .text 68790075Sobrien .align 16 68890075Sobrien .global __ia64_trampoline 68990075Sobrien .proc __ia64_trampoline 69090075Sobrien__ia64_trampoline: 69190075Sobrien { .mmi 69290075Sobrien ld8 r2 = [r1], 8 69390075Sobrien ;; 69490075Sobrien ld8 r15 = [r1] 69590075Sobrien } 69690075Sobrien { .mmi 69790075Sobrien ld8 r3 = [r2], 8 69890075Sobrien ;; 69990075Sobrien ld8 r1 = [r2] 70090075Sobrien mov b6 = r3 70190075Sobrien } 70290075Sobrien { .bbb 70390075Sobrien br.sptk.many b6 70490075Sobrien ;; 70590075Sobrien } 70690075Sobrien .endp __ia64_trampoline 70790075Sobrien#endif 708132718Skan 709132718Skan#ifdef L__compat 710132718Skan// Thunks for backward compatibility. 711132718Skan 712132718Skan .text 713132718Skan .align 16 714132718Skan .global __fixtfti 715132718Skan .proc __fixtfti 716132718Skan__fixtfti: 717132718Skan { .bbb 718132718Skan br.sptk.many __fixxfti 719132718Skan ;; 720132718Skan } 721132718Skan .endp __fixtfti 722132718Skan 723132718Skan .align 16 724132718Skan .global __fixunstfti 725132718Skan .proc __fixunstfti 726132718Skan__fixunstfti: 727132718Skan { .bbb 728132718Skan br.sptk.many __fixunsxfti 729132718Skan ;; 730132718Skan } 731132718Skan .endp __fixunstfti 732132718Skan 733132718Skan .align 16 734132718Skan .global __floattitf 735132718Skan .proc __floattitf 736132718Skan__floattitf: 737132718Skan { .bbb 738132718Skan br.sptk.many __floattixf 739132718Skan ;; 740132718Skan } 741132718Skan .endp __floattitf 742132718Skan 743132718Skan#endif 744