1169689Skan/* Copyright (C) 2000, 2001, 2003, 2005 Free Software Foundation, Inc. 2169689Skan Contributed by James E. Wilson <wilson@cygnus.com>. 3169689Skan 4169689Skan This file is part of GCC. 5169689Skan 6169689Skan GCC is free software; you can redistribute it and/or modify 7169689Skan it under the terms of the GNU General Public License as published by 8169689Skan the Free Software Foundation; either version 2, or (at your option) 9169689Skan any later version. 10169689Skan 11169689Skan GCC is distributed in the hope that it will be useful, 12169689Skan but WITHOUT ANY WARRANTY; without even the implied warranty of 13169689Skan MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14169689Skan GNU General Public License for more details. 15169689Skan 16169689Skan You should have received a copy of the GNU General Public License 17169689Skan along with GCC; see the file COPYING. If not, write to 18169689Skan the Free Software Foundation, 51 Franklin Street, Fifth Floor, 19169689Skan Boston, MA 02110-1301, USA. */ 20169689Skan 21169689Skan/* As a special exception, if you link this library with other files, 22169689Skan some of which are compiled with GCC, to produce an executable, 23169689Skan this library does not by itself cause the resulting executable 24169689Skan to be covered by the GNU General Public License. 25169689Skan This exception does not however invalidate any other reasons why 26169689Skan the executable file might be covered by the GNU General Public License. */ 27169689Skan 28132718Skan#ifdef L__divxf3 2990075Sobrien// Compute a 80-bit IEEE double-extended quotient. 3090075Sobrien// 3190075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency 3290075Sobrien// alternative. 3390075Sobrien// 3490075Sobrien// farg0 holds the dividend. farg1 holds the divisor. 35132718Skan// 36132718Skan// __divtf3 is an alternate symbol name for backward compatibility. 3790075Sobrien 3890075Sobrien .text 3990075Sobrien .align 16 40132718Skan .global __divxf3 4190075Sobrien .global __divtf3 42132718Skan .proc __divxf3 43132718Skan__divxf3: 4490075Sobrien__divtf3: 4590075Sobrien cmp.eq p7, p0 = r0, r0 4690075Sobrien frcpa.s0 f10, p6 = farg0, farg1 4790075Sobrien ;; 4890075Sobrien(p6) cmp.ne p7, p0 = r0, r0 4990075Sobrien .pred.rel.mutex p6, p7 5090075Sobrien(p6) fnma.s1 f11 = farg1, f10, f1 5190075Sobrien(p6) fma.s1 f12 = farg0, f10, f0 5290075Sobrien ;; 5390075Sobrien(p6) fma.s1 f13 = f11, f11, f0 5490075Sobrien(p6) fma.s1 f14 = f11, f11, f11 5590075Sobrien ;; 5690075Sobrien(p6) fma.s1 f11 = f13, f13, f11 5790075Sobrien(p6) fma.s1 f13 = f14, f10, f10 5890075Sobrien ;; 5990075Sobrien(p6) fma.s1 f10 = f13, f11, f10 6090075Sobrien(p6) fnma.s1 f11 = farg1, f12, farg0 6190075Sobrien ;; 6290075Sobrien(p6) fma.s1 f11 = f11, f10, f12 6390075Sobrien(p6) fnma.s1 f12 = farg1, f10, f1 6490075Sobrien ;; 6590075Sobrien(p6) fma.s1 f10 = f12, f10, f10 6690075Sobrien(p6) fnma.s1 f12 = farg1, f11, farg0 6790075Sobrien ;; 6890075Sobrien(p6) fma.s0 fret0 = f12, f10, f11 6990075Sobrien(p7) mov fret0 = f10 7090075Sobrien br.ret.sptk rp 71132718Skan .endp __divxf3 7290075Sobrien#endif 7390075Sobrien 7490075Sobrien#ifdef L__divdf3 7590075Sobrien// Compute a 64-bit IEEE double quotient. 7690075Sobrien// 7790075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency 7890075Sobrien// alternative. 7990075Sobrien// 8090075Sobrien// farg0 holds the dividend. farg1 holds the divisor. 8190075Sobrien 8290075Sobrien .text 8390075Sobrien .align 16 8490075Sobrien .global __divdf3 8590075Sobrien .proc __divdf3 8690075Sobrien__divdf3: 8790075Sobrien cmp.eq p7, p0 = r0, r0 8890075Sobrien frcpa.s0 f10, p6 = farg0, farg1 8990075Sobrien ;; 9090075Sobrien(p6) cmp.ne p7, p0 = r0, r0 9190075Sobrien .pred.rel.mutex p6, p7 9290075Sobrien(p6) fmpy.s1 f11 = farg0, f10 9390075Sobrien(p6) fnma.s1 f12 = farg1, f10, f1 9490075Sobrien ;; 9590075Sobrien(p6) fma.s1 f11 = f12, f11, f11 9690075Sobrien(p6) fmpy.s1 f13 = f12, f12 9790075Sobrien ;; 9890075Sobrien(p6) fma.s1 f10 = f12, f10, f10 9990075Sobrien(p6) fma.s1 f11 = f13, f11, f11 10090075Sobrien ;; 10190075Sobrien(p6) fmpy.s1 f12 = f13, f13 10290075Sobrien(p6) fma.s1 f10 = f13, f10, f10 10390075Sobrien ;; 10490075Sobrien(p6) fma.d.s1 f11 = f12, f11, f11 10590075Sobrien(p6) fma.s1 f10 = f12, f10, f10 10690075Sobrien ;; 10790075Sobrien(p6) fnma.d.s1 f8 = farg1, f11, farg0 10890075Sobrien ;; 10990075Sobrien(p6) fma.d fret0 = f8, f10, f11 11090075Sobrien(p7) mov fret0 = f10 11190075Sobrien br.ret.sptk rp 11290075Sobrien ;; 11390075Sobrien .endp __divdf3 11490075Sobrien#endif 11590075Sobrien 11690075Sobrien#ifdef L__divsf3 11790075Sobrien// Compute a 32-bit IEEE float quotient. 11890075Sobrien// 11990075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency 12090075Sobrien// alternative. 12190075Sobrien// 12290075Sobrien// farg0 holds the dividend. farg1 holds the divisor. 12390075Sobrien 12490075Sobrien .text 12590075Sobrien .align 16 12690075Sobrien .global __divsf3 12790075Sobrien .proc __divsf3 12890075Sobrien__divsf3: 12990075Sobrien cmp.eq p7, p0 = r0, r0 13090075Sobrien frcpa.s0 f10, p6 = farg0, farg1 13190075Sobrien ;; 13290075Sobrien(p6) cmp.ne p7, p0 = r0, r0 13390075Sobrien .pred.rel.mutex p6, p7 13490075Sobrien(p6) fmpy.s1 f8 = farg0, f10 13590075Sobrien(p6) fnma.s1 f9 = farg1, f10, f1 13690075Sobrien ;; 13790075Sobrien(p6) fma.s1 f8 = f9, f8, f8 13890075Sobrien(p6) fmpy.s1 f9 = f9, f9 13990075Sobrien ;; 14090075Sobrien(p6) fma.s1 f8 = f9, f8, f8 14190075Sobrien(p6) fmpy.s1 f9 = f9, f9 14290075Sobrien ;; 14390075Sobrien(p6) fma.d.s1 f10 = f9, f8, f8 14490075Sobrien ;; 14590075Sobrien(p6) fnorm.s.s0 fret0 = f10 14690075Sobrien(p7) mov fret0 = f10 14790075Sobrien br.ret.sptk rp 14890075Sobrien ;; 14990075Sobrien .endp __divsf3 15090075Sobrien#endif 15190075Sobrien 15290075Sobrien#ifdef L__divdi3 15390075Sobrien// Compute a 64-bit integer quotient. 15490075Sobrien// 15590075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency 15690075Sobrien// alternative. 15790075Sobrien// 15890075Sobrien// in0 holds the dividend. in1 holds the divisor. 15990075Sobrien 16090075Sobrien .text 16190075Sobrien .align 16 16290075Sobrien .global __divdi3 16390075Sobrien .proc __divdi3 16490075Sobrien__divdi3: 16590075Sobrien .regstk 2,0,0,0 16690075Sobrien // Transfer inputs to FP registers. 16790075Sobrien setf.sig f8 = in0 16890075Sobrien setf.sig f9 = in1 169169689Skan // Check divide by zero. 170169689Skan cmp.ne.unc p0,p7=0,in1 17190075Sobrien ;; 17290075Sobrien // Convert the inputs to FP, so that they won't be treated as unsigned. 17390075Sobrien fcvt.xf f8 = f8 17490075Sobrien fcvt.xf f9 = f9 175169689Skan(p7) break 1 17690075Sobrien ;; 17790075Sobrien // Compute the reciprocal approximation. 17890075Sobrien frcpa.s1 f10, p6 = f8, f9 17990075Sobrien ;; 18090075Sobrien // 3 Newton-Raphson iterations. 18190075Sobrien(p6) fnma.s1 f11 = f9, f10, f1 18290075Sobrien(p6) fmpy.s1 f12 = f8, f10 18390075Sobrien ;; 18490075Sobrien(p6) fmpy.s1 f13 = f11, f11 18590075Sobrien(p6) fma.s1 f12 = f11, f12, f12 18690075Sobrien ;; 18790075Sobrien(p6) fma.s1 f10 = f11, f10, f10 18890075Sobrien(p6) fma.s1 f11 = f13, f12, f12 18990075Sobrien ;; 19090075Sobrien(p6) fma.s1 f10 = f13, f10, f10 19190075Sobrien(p6) fnma.s1 f12 = f9, f11, f8 19290075Sobrien ;; 19390075Sobrien(p6) fma.s1 f10 = f12, f10, f11 19490075Sobrien ;; 19590075Sobrien // Round quotient to an integer. 19690075Sobrien fcvt.fx.trunc.s1 f10 = f10 19790075Sobrien ;; 19890075Sobrien // Transfer result to GP registers. 19990075Sobrien getf.sig ret0 = f10 20090075Sobrien br.ret.sptk rp 20190075Sobrien ;; 20290075Sobrien .endp __divdi3 20390075Sobrien#endif 20490075Sobrien 20590075Sobrien#ifdef L__moddi3 20690075Sobrien// Compute a 64-bit integer modulus. 20790075Sobrien// 20890075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency 20990075Sobrien// alternative. 21090075Sobrien// 21190075Sobrien// in0 holds the dividend (a). in1 holds the divisor (b). 21290075Sobrien 21390075Sobrien .text 21490075Sobrien .align 16 21590075Sobrien .global __moddi3 21690075Sobrien .proc __moddi3 21790075Sobrien__moddi3: 21890075Sobrien .regstk 2,0,0,0 21990075Sobrien // Transfer inputs to FP registers. 22090075Sobrien setf.sig f14 = in0 22190075Sobrien setf.sig f9 = in1 222169689Skan // Check divide by zero. 223169689Skan cmp.ne.unc p0,p7=0,in1 22490075Sobrien ;; 22590075Sobrien // Convert the inputs to FP, so that they won't be treated as unsigned. 22690075Sobrien fcvt.xf f8 = f14 22790075Sobrien fcvt.xf f9 = f9 228169689Skan(p7) break 1 22990075Sobrien ;; 23090075Sobrien // Compute the reciprocal approximation. 23190075Sobrien frcpa.s1 f10, p6 = f8, f9 23290075Sobrien ;; 23390075Sobrien // 3 Newton-Raphson iterations. 23490075Sobrien(p6) fmpy.s1 f12 = f8, f10 23590075Sobrien(p6) fnma.s1 f11 = f9, f10, f1 23690075Sobrien ;; 23790075Sobrien(p6) fma.s1 f12 = f11, f12, f12 23890075Sobrien(p6) fmpy.s1 f13 = f11, f11 23990075Sobrien ;; 24090075Sobrien(p6) fma.s1 f10 = f11, f10, f10 24190075Sobrien(p6) fma.s1 f11 = f13, f12, f12 24290075Sobrien ;; 24390075Sobrien sub in1 = r0, in1 24490075Sobrien(p6) fma.s1 f10 = f13, f10, f10 24590075Sobrien(p6) fnma.s1 f12 = f9, f11, f8 24690075Sobrien ;; 24790075Sobrien setf.sig f9 = in1 24890075Sobrien(p6) fma.s1 f10 = f12, f10, f11 24990075Sobrien ;; 25090075Sobrien fcvt.fx.trunc.s1 f10 = f10 25190075Sobrien ;; 25290075Sobrien // r = q * (-b) + a 25390075Sobrien xma.l f10 = f10, f9, f14 25490075Sobrien ;; 25590075Sobrien // Transfer result to GP registers. 25690075Sobrien getf.sig ret0 = f10 25790075Sobrien br.ret.sptk rp 25890075Sobrien ;; 25990075Sobrien .endp __moddi3 26090075Sobrien#endif 26190075Sobrien 26290075Sobrien#ifdef L__udivdi3 26390075Sobrien// Compute a 64-bit unsigned integer quotient. 26490075Sobrien// 26590075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency 26690075Sobrien// alternative. 26790075Sobrien// 26890075Sobrien// in0 holds the dividend. in1 holds the divisor. 26990075Sobrien 27090075Sobrien .text 27190075Sobrien .align 16 27290075Sobrien .global __udivdi3 27390075Sobrien .proc __udivdi3 27490075Sobrien__udivdi3: 27590075Sobrien .regstk 2,0,0,0 27690075Sobrien // Transfer inputs to FP registers. 27790075Sobrien setf.sig f8 = in0 27890075Sobrien setf.sig f9 = in1 279169689Skan // Check divide by zero. 280169689Skan cmp.ne.unc p0,p7=0,in1 28190075Sobrien ;; 28290075Sobrien // Convert the inputs to FP, to avoid FP software-assist faults. 28390075Sobrien fcvt.xuf.s1 f8 = f8 28490075Sobrien fcvt.xuf.s1 f9 = f9 285169689Skan(p7) break 1 28690075Sobrien ;; 28790075Sobrien // Compute the reciprocal approximation. 28890075Sobrien frcpa.s1 f10, p6 = f8, f9 28990075Sobrien ;; 29090075Sobrien // 3 Newton-Raphson iterations. 29190075Sobrien(p6) fnma.s1 f11 = f9, f10, f1 29290075Sobrien(p6) fmpy.s1 f12 = f8, f10 29390075Sobrien ;; 29490075Sobrien(p6) fmpy.s1 f13 = f11, f11 29590075Sobrien(p6) fma.s1 f12 = f11, f12, f12 29690075Sobrien ;; 29790075Sobrien(p6) fma.s1 f10 = f11, f10, f10 29890075Sobrien(p6) fma.s1 f11 = f13, f12, f12 29990075Sobrien ;; 30090075Sobrien(p6) fma.s1 f10 = f13, f10, f10 30190075Sobrien(p6) fnma.s1 f12 = f9, f11, f8 30290075Sobrien ;; 30390075Sobrien(p6) fma.s1 f10 = f12, f10, f11 30490075Sobrien ;; 30590075Sobrien // Round quotient to an unsigned integer. 30690075Sobrien fcvt.fxu.trunc.s1 f10 = f10 30790075Sobrien ;; 30890075Sobrien // Transfer result to GP registers. 30990075Sobrien getf.sig ret0 = f10 31090075Sobrien br.ret.sptk rp 31190075Sobrien ;; 31290075Sobrien .endp __udivdi3 31390075Sobrien#endif 31490075Sobrien 31590075Sobrien#ifdef L__umoddi3 31690075Sobrien// Compute a 64-bit unsigned integer modulus. 31790075Sobrien// 31890075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency 31990075Sobrien// alternative. 32090075Sobrien// 32190075Sobrien// in0 holds the dividend (a). in1 holds the divisor (b). 32290075Sobrien 32390075Sobrien .text 32490075Sobrien .align 16 32590075Sobrien .global __umoddi3 32690075Sobrien .proc __umoddi3 32790075Sobrien__umoddi3: 32890075Sobrien .regstk 2,0,0,0 32990075Sobrien // Transfer inputs to FP registers. 33090075Sobrien setf.sig f14 = in0 33190075Sobrien setf.sig f9 = in1 332169689Skan // Check divide by zero. 333169689Skan cmp.ne.unc p0,p7=0,in1 33490075Sobrien ;; 33590075Sobrien // Convert the inputs to FP, to avoid FP software assist faults. 33690075Sobrien fcvt.xuf.s1 f8 = f14 33790075Sobrien fcvt.xuf.s1 f9 = f9 338169689Skan(p7) break 1; 33990075Sobrien ;; 34090075Sobrien // Compute the reciprocal approximation. 34190075Sobrien frcpa.s1 f10, p6 = f8, f9 34290075Sobrien ;; 34390075Sobrien // 3 Newton-Raphson iterations. 34490075Sobrien(p6) fmpy.s1 f12 = f8, f10 34590075Sobrien(p6) fnma.s1 f11 = f9, f10, f1 34690075Sobrien ;; 34790075Sobrien(p6) fma.s1 f12 = f11, f12, f12 34890075Sobrien(p6) fmpy.s1 f13 = f11, f11 34990075Sobrien ;; 35090075Sobrien(p6) fma.s1 f10 = f11, f10, f10 35190075Sobrien(p6) fma.s1 f11 = f13, f12, f12 35290075Sobrien ;; 35390075Sobrien sub in1 = r0, in1 35490075Sobrien(p6) fma.s1 f10 = f13, f10, f10 35590075Sobrien(p6) fnma.s1 f12 = f9, f11, f8 35690075Sobrien ;; 35790075Sobrien setf.sig f9 = in1 35890075Sobrien(p6) fma.s1 f10 = f12, f10, f11 35990075Sobrien ;; 36090075Sobrien // Round quotient to an unsigned integer. 36190075Sobrien fcvt.fxu.trunc.s1 f10 = f10 36290075Sobrien ;; 36390075Sobrien // r = q * (-b) + a 36490075Sobrien xma.l f10 = f10, f9, f14 36590075Sobrien ;; 36690075Sobrien // Transfer result to GP registers. 36790075Sobrien getf.sig ret0 = f10 36890075Sobrien br.ret.sptk rp 36990075Sobrien ;; 37090075Sobrien .endp __umoddi3 37190075Sobrien#endif 37290075Sobrien 37390075Sobrien#ifdef L__divsi3 37490075Sobrien// Compute a 32-bit integer quotient. 37590075Sobrien// 37690075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency 37790075Sobrien// alternative. 37890075Sobrien// 37990075Sobrien// in0 holds the dividend. in1 holds the divisor. 38090075Sobrien 38190075Sobrien .text 38290075Sobrien .align 16 38390075Sobrien .global __divsi3 38490075Sobrien .proc __divsi3 38590075Sobrien__divsi3: 38690075Sobrien .regstk 2,0,0,0 387169689Skan // Check divide by zero. 388169689Skan cmp.ne.unc p0,p7=0,in1 38990075Sobrien sxt4 in0 = in0 39090075Sobrien sxt4 in1 = in1 39190075Sobrien ;; 39290075Sobrien setf.sig f8 = in0 39390075Sobrien setf.sig f9 = in1 394169689Skan(p7) break 1 39590075Sobrien ;; 39690075Sobrien mov r2 = 0x0ffdd 39790075Sobrien fcvt.xf f8 = f8 39890075Sobrien fcvt.xf f9 = f9 39990075Sobrien ;; 40090075Sobrien setf.exp f11 = r2 40190075Sobrien frcpa.s1 f10, p6 = f8, f9 40290075Sobrien ;; 40390075Sobrien(p6) fmpy.s1 f8 = f8, f10 40490075Sobrien(p6) fnma.s1 f9 = f9, f10, f1 40590075Sobrien ;; 40690075Sobrien(p6) fma.s1 f8 = f9, f8, f8 40790075Sobrien(p6) fma.s1 f9 = f9, f9, f11 40890075Sobrien ;; 40990075Sobrien(p6) fma.s1 f10 = f9, f8, f8 41090075Sobrien ;; 41190075Sobrien fcvt.fx.trunc.s1 f10 = f10 41290075Sobrien ;; 41390075Sobrien getf.sig ret0 = f10 41490075Sobrien br.ret.sptk rp 41590075Sobrien ;; 41690075Sobrien .endp __divsi3 41790075Sobrien#endif 41890075Sobrien 41990075Sobrien#ifdef L__modsi3 42090075Sobrien// Compute a 32-bit integer modulus. 42190075Sobrien// 42290075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency 42390075Sobrien// alternative. 42490075Sobrien// 42590075Sobrien// in0 holds the dividend. in1 holds the divisor. 42690075Sobrien 42790075Sobrien .text 42890075Sobrien .align 16 42990075Sobrien .global __modsi3 43090075Sobrien .proc __modsi3 43190075Sobrien__modsi3: 43290075Sobrien .regstk 2,0,0,0 43390075Sobrien mov r2 = 0x0ffdd 43490075Sobrien sxt4 in0 = in0 43590075Sobrien sxt4 in1 = in1 43690075Sobrien ;; 43790075Sobrien setf.sig f13 = r32 43890075Sobrien setf.sig f9 = r33 439169689Skan // Check divide by zero. 440169689Skan cmp.ne.unc p0,p7=0,in1 44190075Sobrien ;; 44290075Sobrien sub in1 = r0, in1 44390075Sobrien fcvt.xf f8 = f13 44490075Sobrien fcvt.xf f9 = f9 44590075Sobrien ;; 44690075Sobrien setf.exp f11 = r2 44790075Sobrien frcpa.s1 f10, p6 = f8, f9 448169689Skan(p7) break 1 44990075Sobrien ;; 45090075Sobrien(p6) fmpy.s1 f12 = f8, f10 45190075Sobrien(p6) fnma.s1 f10 = f9, f10, f1 45290075Sobrien ;; 45390075Sobrien setf.sig f9 = in1 45490075Sobrien(p6) fma.s1 f12 = f10, f12, f12 45590075Sobrien(p6) fma.s1 f10 = f10, f10, f11 45690075Sobrien ;; 45790075Sobrien(p6) fma.s1 f10 = f10, f12, f12 45890075Sobrien ;; 45990075Sobrien fcvt.fx.trunc.s1 f10 = f10 46090075Sobrien ;; 46190075Sobrien xma.l f10 = f10, f9, f13 46290075Sobrien ;; 46390075Sobrien getf.sig ret0 = f10 46490075Sobrien br.ret.sptk rp 46590075Sobrien ;; 46690075Sobrien .endp __modsi3 46790075Sobrien#endif 46890075Sobrien 46990075Sobrien#ifdef L__udivsi3 47090075Sobrien// Compute a 32-bit unsigned integer quotient. 47190075Sobrien// 47290075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency 47390075Sobrien// alternative. 47490075Sobrien// 47590075Sobrien// in0 holds the dividend. in1 holds the divisor. 47690075Sobrien 47790075Sobrien .text 47890075Sobrien .align 16 47990075Sobrien .global __udivsi3 48090075Sobrien .proc __udivsi3 48190075Sobrien__udivsi3: 48290075Sobrien .regstk 2,0,0,0 48390075Sobrien mov r2 = 0x0ffdd 48490075Sobrien zxt4 in0 = in0 48590075Sobrien zxt4 in1 = in1 48690075Sobrien ;; 48790075Sobrien setf.sig f8 = in0 48890075Sobrien setf.sig f9 = in1 489169689Skan // Check divide by zero. 490169689Skan cmp.ne.unc p0,p7=0,in1 49190075Sobrien ;; 49290075Sobrien fcvt.xf f8 = f8 49390075Sobrien fcvt.xf f9 = f9 494169689Skan(p7) break 1 49590075Sobrien ;; 49690075Sobrien setf.exp f11 = r2 49790075Sobrien frcpa.s1 f10, p6 = f8, f9 49890075Sobrien ;; 49990075Sobrien(p6) fmpy.s1 f8 = f8, f10 50090075Sobrien(p6) fnma.s1 f9 = f9, f10, f1 50190075Sobrien ;; 50290075Sobrien(p6) fma.s1 f8 = f9, f8, f8 50390075Sobrien(p6) fma.s1 f9 = f9, f9, f11 50490075Sobrien ;; 50590075Sobrien(p6) fma.s1 f10 = f9, f8, f8 50690075Sobrien ;; 50790075Sobrien fcvt.fxu.trunc.s1 f10 = f10 50890075Sobrien ;; 50990075Sobrien getf.sig ret0 = f10 51090075Sobrien br.ret.sptk rp 51190075Sobrien ;; 51290075Sobrien .endp __udivsi3 51390075Sobrien#endif 51490075Sobrien 51590075Sobrien#ifdef L__umodsi3 51690075Sobrien// Compute a 32-bit unsigned integer modulus. 51790075Sobrien// 51890075Sobrien// From the Intel IA-64 Optimization Guide, choose the minimum latency 51990075Sobrien// alternative. 52090075Sobrien// 52190075Sobrien// in0 holds the dividend. in1 holds the divisor. 52290075Sobrien 52390075Sobrien .text 52490075Sobrien .align 16 52590075Sobrien .global __umodsi3 52690075Sobrien .proc __umodsi3 52790075Sobrien__umodsi3: 52890075Sobrien .regstk 2,0,0,0 52990075Sobrien mov r2 = 0x0ffdd 53090075Sobrien zxt4 in0 = in0 53190075Sobrien zxt4 in1 = in1 53290075Sobrien ;; 53390075Sobrien setf.sig f13 = in0 53490075Sobrien setf.sig f9 = in1 535169689Skan // Check divide by zero. 536169689Skan cmp.ne.unc p0,p7=0,in1 53790075Sobrien ;; 53890075Sobrien sub in1 = r0, in1 53990075Sobrien fcvt.xf f8 = f13 54090075Sobrien fcvt.xf f9 = f9 54190075Sobrien ;; 54290075Sobrien setf.exp f11 = r2 54390075Sobrien frcpa.s1 f10, p6 = f8, f9 544169689Skan(p7) break 1; 54590075Sobrien ;; 54690075Sobrien(p6) fmpy.s1 f12 = f8, f10 54790075Sobrien(p6) fnma.s1 f10 = f9, f10, f1 54890075Sobrien ;; 54990075Sobrien setf.sig f9 = in1 55090075Sobrien(p6) fma.s1 f12 = f10, f12, f12 55190075Sobrien(p6) fma.s1 f10 = f10, f10, f11 55290075Sobrien ;; 55390075Sobrien(p6) fma.s1 f10 = f10, f12, f12 55490075Sobrien ;; 55590075Sobrien fcvt.fxu.trunc.s1 f10 = f10 55690075Sobrien ;; 55790075Sobrien xma.l f10 = f10, f9, f13 55890075Sobrien ;; 55990075Sobrien getf.sig ret0 = f10 56090075Sobrien br.ret.sptk rp 56190075Sobrien ;; 56290075Sobrien .endp __umodsi3 56390075Sobrien#endif 56490075Sobrien 56590075Sobrien#ifdef L__save_stack_nonlocal 56690075Sobrien// Notes on save/restore stack nonlocal: We read ar.bsp but write 56790075Sobrien// ar.bspstore. This is because ar.bsp can be read at all times 56890075Sobrien// (independent of the RSE mode) but since it's read-only we need to 56990075Sobrien// restore the value via ar.bspstore. This is OK because 57090075Sobrien// ar.bsp==ar.bspstore after executing "flushrs". 57190075Sobrien 57290075Sobrien// void __ia64_save_stack_nonlocal(void *save_area, void *stack_pointer) 57390075Sobrien 57490075Sobrien .text 57590075Sobrien .align 16 57690075Sobrien .global __ia64_save_stack_nonlocal 57790075Sobrien .proc __ia64_save_stack_nonlocal 57890075Sobrien__ia64_save_stack_nonlocal: 57990075Sobrien { .mmf 58090075Sobrien alloc r18 = ar.pfs, 2, 0, 0, 0 58190075Sobrien mov r19 = ar.rsc 58290075Sobrien ;; 58390075Sobrien } 58490075Sobrien { .mmi 58590075Sobrien flushrs 58690075Sobrien st8 [in0] = in1, 24 58790075Sobrien and r19 = 0x1c, r19 58890075Sobrien ;; 58990075Sobrien } 59090075Sobrien { .mmi 59190075Sobrien st8 [in0] = r18, -16 59290075Sobrien mov ar.rsc = r19 59390075Sobrien or r19 = 0x3, r19 59490075Sobrien ;; 59590075Sobrien } 59690075Sobrien { .mmi 59790075Sobrien mov r16 = ar.bsp 59890075Sobrien mov r17 = ar.rnat 59990075Sobrien adds r2 = 8, in0 60090075Sobrien ;; 60190075Sobrien } 60290075Sobrien { .mmi 60390075Sobrien st8 [in0] = r16 60490075Sobrien st8 [r2] = r17 60590075Sobrien } 60690075Sobrien { .mib 60790075Sobrien mov ar.rsc = r19 60890075Sobrien br.ret.sptk.few rp 60990075Sobrien ;; 61090075Sobrien } 61190075Sobrien .endp __ia64_save_stack_nonlocal 61290075Sobrien#endif 61390075Sobrien 61490075Sobrien#ifdef L__nonlocal_goto 61590075Sobrien// void __ia64_nonlocal_goto(void *target_label, void *save_area, 61690075Sobrien// void *static_chain); 61790075Sobrien 61890075Sobrien .text 61990075Sobrien .align 16 62090075Sobrien .global __ia64_nonlocal_goto 62190075Sobrien .proc __ia64_nonlocal_goto 62290075Sobrien__ia64_nonlocal_goto: 62390075Sobrien { .mmi 62490075Sobrien alloc r20 = ar.pfs, 3, 0, 0, 0 62590075Sobrien ld8 r12 = [in1], 8 62690075Sobrien mov.ret.sptk rp = in0, .L0 62790075Sobrien ;; 62890075Sobrien } 62990075Sobrien { .mmf 63090075Sobrien ld8 r16 = [in1], 8 63190075Sobrien mov r19 = ar.rsc 63290075Sobrien ;; 63390075Sobrien } 63490075Sobrien { .mmi 63590075Sobrien flushrs 63690075Sobrien ld8 r17 = [in1], 8 63790075Sobrien and r19 = 0x1c, r19 63890075Sobrien ;; 63990075Sobrien } 64090075Sobrien { .mmi 64190075Sobrien ld8 r18 = [in1] 64290075Sobrien mov ar.rsc = r19 64390075Sobrien or r19 = 0x3, r19 64490075Sobrien ;; 64590075Sobrien } 64690075Sobrien { .mmi 64790075Sobrien mov ar.bspstore = r16 64890075Sobrien ;; 64990075Sobrien mov ar.rnat = r17 65090075Sobrien ;; 65190075Sobrien } 65290075Sobrien { .mmi 65390075Sobrien loadrs 65490075Sobrien invala 65590075Sobrien mov r15 = in2 65690075Sobrien ;; 65790075Sobrien } 65890075Sobrien.L0: { .mib 65990075Sobrien mov ar.rsc = r19 66090075Sobrien mov ar.pfs = r18 66190075Sobrien br.ret.sptk.few rp 66290075Sobrien ;; 66390075Sobrien } 66490075Sobrien .endp __ia64_nonlocal_goto 66590075Sobrien#endif 66690075Sobrien 66790075Sobrien#ifdef L__restore_stack_nonlocal 66890075Sobrien// This is mostly the same as nonlocal_goto above. 66990075Sobrien// ??? This has not been tested yet. 67090075Sobrien 67190075Sobrien// void __ia64_restore_stack_nonlocal(void *save_area) 67290075Sobrien 67390075Sobrien .text 67490075Sobrien .align 16 67590075Sobrien .global __ia64_restore_stack_nonlocal 67690075Sobrien .proc __ia64_restore_stack_nonlocal 67790075Sobrien__ia64_restore_stack_nonlocal: 67890075Sobrien { .mmf 67990075Sobrien alloc r20 = ar.pfs, 4, 0, 0, 0 68090075Sobrien ld8 r12 = [in0], 8 68190075Sobrien ;; 68290075Sobrien } 68390075Sobrien { .mmb 68490075Sobrien ld8 r16=[in0], 8 68590075Sobrien mov r19 = ar.rsc 68690075Sobrien ;; 68790075Sobrien } 68890075Sobrien { .mmi 68990075Sobrien flushrs 69090075Sobrien ld8 r17 = [in0], 8 69190075Sobrien and r19 = 0x1c, r19 69290075Sobrien ;; 69390075Sobrien } 69490075Sobrien { .mmf 69590075Sobrien ld8 r18 = [in0] 69690075Sobrien mov ar.rsc = r19 69790075Sobrien ;; 69890075Sobrien } 69990075Sobrien { .mmi 70090075Sobrien mov ar.bspstore = r16 70190075Sobrien ;; 70290075Sobrien mov ar.rnat = r17 70390075Sobrien or r19 = 0x3, r19 70490075Sobrien ;; 70590075Sobrien } 70690075Sobrien { .mmf 70790075Sobrien loadrs 70890075Sobrien invala 70990075Sobrien ;; 71090075Sobrien } 71190075Sobrien.L0: { .mib 71290075Sobrien mov ar.rsc = r19 71390075Sobrien mov ar.pfs = r18 71490075Sobrien br.ret.sptk.few rp 71590075Sobrien ;; 71690075Sobrien } 71790075Sobrien .endp __ia64_restore_stack_nonlocal 71890075Sobrien#endif 71990075Sobrien 72090075Sobrien#ifdef L__trampoline 72190075Sobrien// Implement the nested function trampoline. This is out of line 72290075Sobrien// so that we don't have to bother with flushing the icache, as 72390075Sobrien// well as making the on-stack trampoline smaller. 72490075Sobrien// 72590075Sobrien// The trampoline has the following form: 72690075Sobrien// 72790075Sobrien// +-------------------+ > 72890075Sobrien// TRAMP: | __ia64_trampoline | | 72990075Sobrien// +-------------------+ > fake function descriptor 73090075Sobrien// | TRAMP+16 | | 73190075Sobrien// +-------------------+ > 73290075Sobrien// | target descriptor | 73390075Sobrien// +-------------------+ 73490075Sobrien// | static link | 73590075Sobrien// +-------------------+ 73690075Sobrien 73790075Sobrien .text 73890075Sobrien .align 16 73990075Sobrien .global __ia64_trampoline 74090075Sobrien .proc __ia64_trampoline 74190075Sobrien__ia64_trampoline: 74290075Sobrien { .mmi 74390075Sobrien ld8 r2 = [r1], 8 74490075Sobrien ;; 74590075Sobrien ld8 r15 = [r1] 74690075Sobrien } 74790075Sobrien { .mmi 74890075Sobrien ld8 r3 = [r2], 8 74990075Sobrien ;; 75090075Sobrien ld8 r1 = [r2] 75190075Sobrien mov b6 = r3 75290075Sobrien } 75390075Sobrien { .bbb 75490075Sobrien br.sptk.many b6 75590075Sobrien ;; 75690075Sobrien } 75790075Sobrien .endp __ia64_trampoline 75890075Sobrien#endif 759132718Skan 760132718Skan// Thunks for backward compatibility. 761169689Skan#ifdef L_fixtfdi 762132718Skan .text 763132718Skan .align 16 764132718Skan .global __fixtfti 765132718Skan .proc __fixtfti 766132718Skan__fixtfti: 767132718Skan { .bbb 768132718Skan br.sptk.many __fixxfti 769132718Skan ;; 770132718Skan } 771132718Skan .endp __fixtfti 772169689Skan#endif 773169689Skan#ifdef L_fixunstfdi 774132718Skan .align 16 775132718Skan .global __fixunstfti 776132718Skan .proc __fixunstfti 777132718Skan__fixunstfti: 778132718Skan { .bbb 779132718Skan br.sptk.many __fixunsxfti 780132718Skan ;; 781132718Skan } 782132718Skan .endp __fixunstfti 783169689Skan#endif 784169689Skan#if L_floatditf 785132718Skan .align 16 786132718Skan .global __floattitf 787132718Skan .proc __floattitf 788132718Skan__floattitf: 789132718Skan { .bbb 790132718Skan br.sptk.many __floattixf 791132718Skan ;; 792132718Skan } 793132718Skan .endp __floattitf 794132718Skan#endif 795