//===----------------------Hexagon builtin routine ------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // Double Precision Divide #define A r1:0 #define AH r1 #define AL r0 #define B r3:2 #define BH r3 #define BL r2 #define Q r5:4 #define QH r5 #define QL r4 #define PROD r7:6 #define PRODHI r7 #define PRODLO r6 #define SFONE r8 #define SFDEN r9 #define SFERROR r10 #define SFRECIP r11 #define EXPBA r13:12 #define EXPB r13 #define EXPA r12 #define REMSUB2 r15:14 #define SIGN r28 #define Q_POSITIVE p3 #define NORMAL p2 #define NO_OVF_UNF p1 #define P_TMP p0 #define RECIPEST_SHIFT 3 #define QADJ 61 #define DFCLASS_NORMAL 0x02 #define DFCLASS_NUMBER 0x0F #define DFCLASS_INFINITE 0x08 #define DFCLASS_ZERO 0x01 #define DFCLASS_NONZERO (DFCLASS_NUMBER ^ DFCLASS_ZERO) #define DFCLASS_NONINFINITE (DFCLASS_NUMBER ^ DFCLASS_INFINITE) #define DF_MANTBITS 52 #define DF_EXPBITS 11 #define SF_MANTBITS 23 #define SF_EXPBITS 8 #define DF_BIAS 0x3ff #define SR_ROUND_OFF 22 #define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG #define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG #define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG #define END(TAG) .size TAG,.-TAG .text .global __hexagon_divdf3 .type __hexagon_divdf3,@function Q6_ALIAS(divdf3) FAST_ALIAS(divdf3) FAST2_ALIAS(divdf3) .p2align 5 __hexagon_divdf3: { NORMAL = dfclass(A,#DFCLASS_NORMAL) NORMAL = dfclass(B,#DFCLASS_NORMAL) EXPBA = combine(BH,AH) SIGN = xor(AH,BH) } #undef A #undef AH #undef AL #undef B #undef BH #undef BL #define REM r1:0 #define REMHI r1 #define REMLO r0 #define DENOM r3:2 #define DENOMHI r3 #define DENOMLO r2 { if (!NORMAL) jump .Ldiv_abnormal PROD = extractu(DENOM,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS) SFONE = ##0x3f800001 } { SFDEN = or(SFONE,PRODLO) EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32) EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32) Q_POSITIVE = cmp.gt(SIGN,#-1) } #undef SIGN #define ONE r28 .Ldenorm_continue: { SFRECIP,P_TMP = sfrecipa(SFONE,SFDEN) SFERROR = and(SFONE,#-2) ONE = #1 EXPA = sub(EXPA,EXPB) } #undef EXPB #define RECIPEST r13 { SFERROR -= sfmpy(SFRECIP,SFDEN):lib REMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32) RECIPEST = ##0x00800000 << RECIPEST_SHIFT } { SFRECIP += sfmpy(SFRECIP,SFERROR):lib DENOMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32) SFERROR = and(SFONE,#-2) } { SFERROR -= sfmpy(SFRECIP,SFDEN):lib QH = #-DF_BIAS+1 QL = #DF_BIAS-1 } { SFRECIP += sfmpy(SFRECIP,SFERROR):lib NO_OVF_UNF = cmp.gt(EXPA,QH) NO_OVF_UNF = !cmp.gt(EXPA,QL) } { RECIPEST = insert(SFRECIP,#SF_MANTBITS,#RECIPEST_SHIFT) Q = #0 EXPA = add(EXPA,#-QADJ) } #undef SFERROR #undef SFRECIP #define TMP r10 #define TMP1 r11 { RECIPEST = add(RECIPEST,#((-3) << RECIPEST_SHIFT)) } #define DIV_ITER1B(QSHIFTINSN,QSHIFT,REMSHIFT,EXTRA) \ { \ PROD = mpyu(RECIPEST,REMHI); \ REM = asl(REM,# ## ( REMSHIFT )); \ }; \ { \ PRODLO = # ## 0; \ REM -= mpyu(PRODHI,DENOMLO); \ REMSUB2 = mpyu(PRODHI,DENOMHI); \ }; \ { \ Q += QSHIFTINSN(PROD, # ## ( QSHIFT )); \ REM -= asl(REMSUB2, # ## 32); \ EXTRA \ } DIV_ITER1B(ASL,14,15,) DIV_ITER1B(ASR,1,15,) DIV_ITER1B(ASR,16,15,) DIV_ITER1B(ASR,31,15,PROD=# ( 0 );) #undef REMSUB2 #define TMPPAIR r15:14 #define TMPPAIRHI r15 #define TMPPAIRLO r14 #undef RECIPEST #define EXPB r13 { // compare or sub with carry TMPPAIR = sub(REM,DENOM) P_TMP = cmp.gtu(DENOM,REM) // set up amt to add to q if (!P_TMP.new) PRODLO = #2 } { Q = add(Q,PROD) if (!P_TMP) REM = TMPPAIR TMPPAIR = #0 } { P_TMP = cmp.eq(REM,TMPPAIR) if (!P_TMP.new) QL = or(QL,ONE) } { PROD = neg(Q) } { if (!Q_POSITIVE) Q = PROD } #undef REM #undef REMHI #undef REMLO #undef DENOM #undef DENOMLO #undef DENOMHI #define A r1:0 #define AH r1 #define AL r0 #define B r3:2 #define BH r3 #define BL r2 { A = convert_d2df(Q) if (!NO_OVF_UNF) jump .Ldiv_ovf_unf } { AH += asl(EXPA,#DF_MANTBITS-32) jumpr r31 } .Ldiv_ovf_unf: { AH += asl(EXPA,#DF_MANTBITS-32) EXPB = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32) } { PROD = abs(Q) EXPA = add(EXPA,EXPB) } { P_TMP = cmp.gt(EXPA,##DF_BIAS+DF_BIAS) // overflow if (P_TMP.new) jump:nt .Ldiv_ovf } { P_TMP = cmp.gt(EXPA,#0) if (P_TMP.new) jump:nt .Lpossible_unf // round up to normal possible... } // Underflow // We know what the infinite range exponent should be (EXPA) // Q is 2's complement, PROD is abs(Q) // Normalize Q, shift right, add a high bit, convert, change exponent #define FUDGE1 7 // how much to shift right #define FUDGE2 4 // how many guard/round to keep at lsbs { EXPB = add(clb(PROD),#-1) // doesn't need to be added in since EXPA = sub(#FUDGE1,EXPA) // we extract post-converted exponent TMP = USR TMP1 = #63 } { EXPB = min(EXPA,TMP1) TMP1 = or(TMP,#0x030) PROD = asl(PROD,EXPB) EXPA = #0 } { TMPPAIR = extractu(PROD,EXPBA) // bits that will get shifted out PROD = lsr(PROD,EXPB) // shift out bits B = #1 } { P_TMP = cmp.gtu(B,TMPPAIR) if (!P_TMP.new) PRODLO = or(BL,PRODLO) PRODHI = setbit(PRODHI,#DF_MANTBITS-32+FUDGE2) } { Q = neg(PROD) P_TMP = bitsclr(PRODLO,#(1<