//===----------------------Hexagon builtin routine ------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // Double Precision Multiply #define A r1:0 #define AH r1 #define AL r0 #define B r3:2 #define BH r3 #define BL r2 #define EXPA r4 #define EXPB r5 #define EXPB_A r5:4 #define ZTMP r7:6 #define ZTMPH r7 #define ZTMPL r6 #define ATMP r13:12 #define ATMPH r13 #define ATMPL r12 #define BTMP r9:8 #define BTMPH r9 #define BTMPL r8 #define ATMP2 r11:10 #define ATMP2H r11 #define ATMP2L r10 #define EXPDIFF r15 #define EXTRACTOFF r14 #define EXTRACTAMT r15:14 #define TMP r28 #define MANTBITS 52 #define HI_MANTBITS 20 #define EXPBITS 11 #define BIAS 1024 #define MANTISSA_TO_INT_BIAS 52 #define SR_BIT_INEXACT 5 #ifndef SR_ROUND_OFF #define SR_ROUND_OFF 22 #endif #define NORMAL p3 #define BIGB p2 #define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG #define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG #define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG #define END(TAG) .size TAG,.-TAG .text .global __hexagon_adddf3 .global __hexagon_subdf3 .type __hexagon_adddf3, @function .type __hexagon_subdf3, @function Q6_ALIAS(adddf3) FAST_ALIAS(adddf3) FAST2_ALIAS(adddf3) Q6_ALIAS(subdf3) FAST_ALIAS(subdf3) FAST2_ALIAS(subdf3) .p2align 5 __hexagon_adddf3: { EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS) EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS) ATMP = combine(##0x20000000,#0) } { NORMAL = dfclass(A,#2) NORMAL = dfclass(B,#2) BTMP = ATMP BIGB = cmp.gtu(EXPB,EXPA) // Is B substantially greater than A? } { if (!NORMAL) jump .Ladd_abnormal // If abnormal, go to special code if (BIGB) A = B // if B >> A, swap A and B if (BIGB) B = A // If B >> A, swap A and B if (BIGB) EXPB_A = combine(EXPA,EXPB) // swap exponents } { ATMP = insert(A,#MANTBITS,#EXPBITS-2) // Q1.62 BTMP = insert(B,#MANTBITS,#EXPBITS-2) // Q1.62 EXPDIFF = sub(EXPA,EXPB) ZTMP = combine(#62,#1) } #undef BIGB #undef NORMAL #define B_POS p3 #define A_POS p2 #define NO_STICKIES p1 .Ladd_continue: { EXPDIFF = min(EXPDIFF,ZTMPH) // If exponent difference >= ~60, // will collapse to sticky bit ATMP2 = neg(ATMP) A_POS = cmp.gt(AH,#-1) EXTRACTOFF = #0 } { if (!A_POS) ATMP = ATMP2 ATMP2 = extractu(BTMP,EXTRACTAMT) BTMP = ASR(BTMP,EXPDIFF) #undef EXTRACTAMT #undef EXPDIFF #undef EXTRACTOFF #define ZERO r15:14 ZERO = #0 } { NO_STICKIES = cmp.eq(ATMP2,ZERO) if (!NO_STICKIES.new) BTMPL = or(BTMPL,ZTMPL) EXPB = add(EXPA,#-BIAS-60) B_POS = cmp.gt(BH,#-1) } { ATMP = add(ATMP,BTMP) // ADD!!! ATMP2 = sub(ATMP,BTMP) // Negate and ADD --> SUB!!! ZTMP = combine(#54,##2045) } { p0 = cmp.gtu(EXPA,ZTMPH) // must be pretty high in case of large cancellation p0 = !cmp.gtu(EXPA,ZTMPL) if (!p0.new) jump:nt .Ladd_ovf_unf if (!B_POS) ATMP = ATMP2 // if B neg, pick difference } { A = convert_d2df(ATMP) // Convert to Double Precision, taking care of flags, etc. So nice! p0 = cmp.eq(ATMPH,#0) p0 = cmp.eq(ATMPL,#0) if (p0.new) jump:nt .Ladd_zero // or maybe conversion handles zero case correctly? } { AH += asl(EXPB,#HI_MANTBITS) jumpr r31 } .falign __hexagon_subdf3: { BH = togglebit(BH,#31) jump __qdsp_adddf3 } .falign .Ladd_zero: // True zero, full cancellation // +0 unless round towards negative infinity { TMP = USR A = #0 BH = #1 } { TMP = extractu(TMP,#2,#22) BH = asl(BH,#31) } { p0 = cmp.eq(TMP,#2) if (p0.new) AH = xor(AH,BH) jumpr r31 } .falign .Ladd_ovf_unf: // Overflow or Denormal is possible // Good news: Underflow flag is not possible! // ATMP has 2's complement value // // EXPA has A's exponent, EXPB has EXPA-BIAS-60 // // Convert, extract exponent, add adjustment. // If > 2046, overflow // If <= 0, denormal // // Note that we've not done our zero check yet, so do that too { A = convert_d2df(ATMP) p0 = cmp.eq(ATMPH,#0) p0 = cmp.eq(ATMPL,#0) if (p0.new) jump:nt .Ladd_zero } { TMP = extractu(AH,#EXPBITS,#HI_MANTBITS) AH += asl(EXPB,#HI_MANTBITS) } { EXPB = add(EXPB,TMP) B = combine(##0x00100000,#0) } { p0 = cmp.gt(EXPB,##BIAS+BIAS-2) if (p0.new) jump:nt .Ladd_ovf } { p0 = cmp.gt(EXPB,#0) if (p0.new) jumpr:t r31 TMP = sub(#1,EXPB) } { B = insert(A,#MANTBITS,#0) A = ATMP } { B = lsr(B,TMP) } { A = insert(B,#63,#0) jumpr r31 } .falign .Ladd_ovf: // We get either max finite value or infinity. Either way, overflow+inexact { A = ATMP // 2's complement value TMP = USR ATMP = combine(##0x7fefffff,#-1) // positive max finite } { EXPB = extractu(TMP,#2,#SR_ROUND_OFF) // rounding bits TMP = or(TMP,#0x28) // inexact + overflow BTMP = combine(##0x7ff00000,#0) // positive infinity } { USR = TMP EXPB ^= lsr(AH,#31) // Does sign match rounding? TMP = EXPB // unmodified rounding mode } { p0 = !cmp.eq(TMP,#1) // If not round-to-zero and p0 = !cmp.eq(EXPB,#2) // Not rounding the other way, if (p0.new) ATMP = BTMP // we should get infinity } { A = insert(ATMP,#63,#0) // insert inf/maxfinite, leave sign } { p0 = dfcmp.eq(A,A) jumpr r31 } .Ladd_abnormal: { ATMP = extractu(A,#63,#0) // strip off sign BTMP = extractu(B,#63,#0) // strip off sign } { p3 = cmp.gtu(ATMP,BTMP) if (!p3.new) A = B // sort values if (!p3.new) B = A // sort values } { // Any NaN --> NaN, possibly raise invalid if sNaN p0 = dfclass(A,#0x0f) // A not NaN? if (!p0.new) jump:nt .Linvalid_nan_add if (!p3) ATMP = BTMP if (!p3) BTMP = ATMP } { // Infinity + non-infinity number is infinity // Infinity + infinity --> inf or nan p1 = dfclass(A,#0x08) // A is infinity if (p1.new) jump:nt .Linf_add } { p2 = dfclass(B,#0x01) // B is zero if (p2.new) jump:nt .LB_zero // so return A or special 0+0 ATMP = #0 } // We are left with adding one or more subnormals { p0 = dfclass(A,#4) if (p0.new) jump:nt .Ladd_two_subnormal ATMP = combine(##0x20000000,#0) } { EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS) EXPB = #1 // BTMP already ABS(B) BTMP = asl(BTMP,#EXPBITS-2) } #undef ZERO #define EXTRACTOFF r14 #define EXPDIFF r15 { ATMP = insert(A,#MANTBITS,#EXPBITS-2) EXPDIFF = sub(EXPA,EXPB) ZTMP = combine(#62,#1) jump .Ladd_continue } .Ladd_two_subnormal: { ATMP = extractu(A,#63,#0) BTMP = extractu(B,#63,#0) } { ATMP = neg(ATMP) BTMP = neg(BTMP) p0 = cmp.gt(AH,#-1) p1 = cmp.gt(BH,#-1) } { if (p0) ATMP = A if (p1) BTMP = B } { ATMP = add(ATMP,BTMP) } { BTMP = neg(ATMP) p0 = cmp.gt(ATMPH,#-1) B = #0 } { if (!p0) A = BTMP if (p0) A = ATMP BH = ##0x80000000 } { if (!p0) AH = or(AH,BH) p0 = dfcmp.eq(A,B) if (p0.new) jump:nt .Lzero_plus_zero } { jumpr r31 } .Linvalid_nan_add: { TMP = convert_df2sf(A) // will generate invalid if sNaN p0 = dfclass(B,#0x0f) // if B is not NaN if (p0.new) B = A // make it whatever A is } { BL = convert_df2sf(B) // will generate invalid if sNaN A = #-1 jumpr r31 } .falign .LB_zero: { p0 = dfcmp.eq(ATMP,A) // is A also zero? if (!p0.new) jumpr:t r31 // If not, just return A } // 0 + 0 is special // if equal integral values, they have the same sign, which is fine for all rounding // modes. // If unequal in sign, we get +0 for all rounding modes except round down .Lzero_plus_zero: { p0 = cmp.eq(A,B) if (p0.new) jumpr:t r31 } { TMP = USR } { TMP = extractu(TMP,#2,#SR_ROUND_OFF) A = #0 } { p0 = cmp.eq(TMP,#2) if (p0.new) AH = ##0x80000000 jumpr r31 } .Linf_add: // adding infinities is only OK if they are equal { p0 = !cmp.eq(AH,BH) // Do they have different signs p0 = dfclass(B,#8) // And is B also infinite? if (!p0.new) jumpr:t r31 // If not, just a normal inf } { BL = ##0x7f800001 // sNAN } { A = convert_sf2df(BL) // trigger invalid, set NaN jumpr r31 } END(__hexagon_adddf3)