1336817Sdim//===----------------------Hexagon builtin routine ------------------------===// 2336817Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6336817Sdim// 7336817Sdim//===----------------------------------------------------------------------===// 8336817Sdim 9353358Sdim// Double Precision Multiply 10336817Sdim 11336817Sdim#define A r1:0 12336817Sdim#define AH r1 13336817Sdim#define AL r0 14336817Sdim#define B r3:2 15336817Sdim#define BH r3 16336817Sdim#define BL r2 17336817Sdim 18336817Sdim#define EXPA r4 19336817Sdim#define EXPB r5 20336817Sdim#define EXPB_A r5:4 21336817Sdim 22336817Sdim#define ZTMP r7:6 23336817Sdim#define ZTMPH r7 24336817Sdim#define ZTMPL r6 25336817Sdim 26336817Sdim#define ATMP r13:12 27336817Sdim#define ATMPH r13 28336817Sdim#define ATMPL r12 29336817Sdim 30336817Sdim#define BTMP r9:8 31336817Sdim#define BTMPH r9 32336817Sdim#define BTMPL r8 33336817Sdim 34336817Sdim#define ATMP2 r11:10 35336817Sdim#define ATMP2H r11 36336817Sdim#define ATMP2L r10 37336817Sdim 38336817Sdim#define EXPDIFF r15 39336817Sdim#define EXTRACTOFF r14 40336817Sdim#define EXTRACTAMT r15:14 41336817Sdim 42336817Sdim#define TMP r28 43336817Sdim 44336817Sdim#define MANTBITS 52 45336817Sdim#define HI_MANTBITS 20 46336817Sdim#define EXPBITS 11 47336817Sdim#define BIAS 1024 48336817Sdim#define MANTISSA_TO_INT_BIAS 52 49336817Sdim#define SR_BIT_INEXACT 5 50336817Sdim 51336817Sdim#ifndef SR_ROUND_OFF 52336817Sdim#define SR_ROUND_OFF 22 53336817Sdim#endif 54336817Sdim 55336817Sdim#define NORMAL p3 56336817Sdim#define BIGB p2 57336817Sdim 58336817Sdim#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG 59336817Sdim#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG 60336817Sdim#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG 61336817Sdim#define END(TAG) .size TAG,.-TAG 62336817Sdim 63336817Sdim .text 64336817Sdim .global __hexagon_adddf3 65336817Sdim .global __hexagon_subdf3 66336817Sdim .type __hexagon_adddf3, @function 67336817Sdim .type __hexagon_subdf3, @function 68336817Sdim 69336817SdimQ6_ALIAS(adddf3) 70336817SdimFAST_ALIAS(adddf3) 71336817SdimFAST2_ALIAS(adddf3) 72336817SdimQ6_ALIAS(subdf3) 73336817SdimFAST_ALIAS(subdf3) 74336817SdimFAST2_ALIAS(subdf3) 75336817Sdim 76336817Sdim .p2align 5 77336817Sdim__hexagon_adddf3: 78336817Sdim { 79336817Sdim EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS) 80336817Sdim EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS) 81336817Sdim ATMP = combine(##0x20000000,#0) 82336817Sdim } 83336817Sdim { 84336817Sdim NORMAL = dfclass(A,#2) 85336817Sdim NORMAL = dfclass(B,#2) 86336817Sdim BTMP = ATMP 87336817Sdim BIGB = cmp.gtu(EXPB,EXPA) // Is B substantially greater than A? 88336817Sdim } 89336817Sdim { 90336817Sdim if (!NORMAL) jump .Ladd_abnormal // If abnormal, go to special code 91336817Sdim if (BIGB) A = B // if B >> A, swap A and B 92336817Sdim if (BIGB) B = A // If B >> A, swap A and B 93336817Sdim if (BIGB) EXPB_A = combine(EXPA,EXPB) // swap exponents 94336817Sdim } 95336817Sdim { 96336817Sdim ATMP = insert(A,#MANTBITS,#EXPBITS-2) // Q1.62 97336817Sdim BTMP = insert(B,#MANTBITS,#EXPBITS-2) // Q1.62 98336817Sdim EXPDIFF = sub(EXPA,EXPB) 99336817Sdim ZTMP = combine(#62,#1) 100336817Sdim } 101336817Sdim#undef BIGB 102336817Sdim#undef NORMAL 103336817Sdim#define B_POS p3 104336817Sdim#define A_POS p2 105336817Sdim#define NO_STICKIES p1 106336817Sdim.Ladd_continue: 107336817Sdim { 108336817Sdim EXPDIFF = min(EXPDIFF,ZTMPH) // If exponent difference >= ~60, 109336817Sdim // will collapse to sticky bit 110336817Sdim ATMP2 = neg(ATMP) 111336817Sdim A_POS = cmp.gt(AH,#-1) 112336817Sdim EXTRACTOFF = #0 113336817Sdim } 114336817Sdim { 115336817Sdim if (!A_POS) ATMP = ATMP2 116336817Sdim ATMP2 = extractu(BTMP,EXTRACTAMT) 117336817Sdim BTMP = ASR(BTMP,EXPDIFF) 118336817Sdim#undef EXTRACTAMT 119336817Sdim#undef EXPDIFF 120336817Sdim#undef EXTRACTOFF 121336817Sdim#define ZERO r15:14 122336817Sdim ZERO = #0 123336817Sdim } 124336817Sdim { 125336817Sdim NO_STICKIES = cmp.eq(ATMP2,ZERO) 126336817Sdim if (!NO_STICKIES.new) BTMPL = or(BTMPL,ZTMPL) 127336817Sdim EXPB = add(EXPA,#-BIAS-60) 128336817Sdim B_POS = cmp.gt(BH,#-1) 129336817Sdim } 130336817Sdim { 131336817Sdim ATMP = add(ATMP,BTMP) // ADD!!! 132336817Sdim ATMP2 = sub(ATMP,BTMP) // Negate and ADD --> SUB!!! 133336817Sdim ZTMP = combine(#54,##2045) 134336817Sdim } 135336817Sdim { 136336817Sdim p0 = cmp.gtu(EXPA,ZTMPH) // must be pretty high in case of large cancellation 137336817Sdim p0 = !cmp.gtu(EXPA,ZTMPL) 138336817Sdim if (!p0.new) jump:nt .Ladd_ovf_unf 139336817Sdim if (!B_POS) ATMP = ATMP2 // if B neg, pick difference 140336817Sdim } 141336817Sdim { 142336817Sdim A = convert_d2df(ATMP) // Convert to Double Precision, taking care of flags, etc. So nice! 143336817Sdim p0 = cmp.eq(ATMPH,#0) 144336817Sdim p0 = cmp.eq(ATMPL,#0) 145336817Sdim if (p0.new) jump:nt .Ladd_zero // or maybe conversion handles zero case correctly? 146336817Sdim } 147336817Sdim { 148336817Sdim AH += asl(EXPB,#HI_MANTBITS) 149336817Sdim jumpr r31 150336817Sdim } 151336817Sdim .falign 152336817Sdim__hexagon_subdf3: 153336817Sdim { 154336817Sdim BH = togglebit(BH,#31) 155336817Sdim jump __qdsp_adddf3 156336817Sdim } 157336817Sdim 158336817Sdim 159336817Sdim .falign 160336817Sdim.Ladd_zero: 161336817Sdim // True zero, full cancellation 162336817Sdim // +0 unless round towards negative infinity 163336817Sdim { 164336817Sdim TMP = USR 165336817Sdim A = #0 166336817Sdim BH = #1 167336817Sdim } 168336817Sdim { 169336817Sdim TMP = extractu(TMP,#2,#22) 170336817Sdim BH = asl(BH,#31) 171336817Sdim } 172336817Sdim { 173336817Sdim p0 = cmp.eq(TMP,#2) 174336817Sdim if (p0.new) AH = xor(AH,BH) 175336817Sdim jumpr r31 176336817Sdim } 177336817Sdim .falign 178336817Sdim.Ladd_ovf_unf: 179336817Sdim // Overflow or Denormal is possible 180336817Sdim // Good news: Underflow flag is not possible! 181353358Sdim 182353358Sdim // ATMP has 2's complement value 183353358Sdim // 184353358Sdim // EXPA has A's exponent, EXPB has EXPA-BIAS-60 185353358Sdim // 186353358Sdim // Convert, extract exponent, add adjustment. 187353358Sdim // If > 2046, overflow 188353358Sdim // If <= 0, denormal 189353358Sdim // 190353358Sdim // Note that we've not done our zero check yet, so do that too 191353358Sdim 192336817Sdim { 193336817Sdim A = convert_d2df(ATMP) 194336817Sdim p0 = cmp.eq(ATMPH,#0) 195336817Sdim p0 = cmp.eq(ATMPL,#0) 196336817Sdim if (p0.new) jump:nt .Ladd_zero 197336817Sdim } 198336817Sdim { 199336817Sdim TMP = extractu(AH,#EXPBITS,#HI_MANTBITS) 200336817Sdim AH += asl(EXPB,#HI_MANTBITS) 201336817Sdim } 202336817Sdim { 203336817Sdim EXPB = add(EXPB,TMP) 204336817Sdim B = combine(##0x00100000,#0) 205336817Sdim } 206336817Sdim { 207336817Sdim p0 = cmp.gt(EXPB,##BIAS+BIAS-2) 208336817Sdim if (p0.new) jump:nt .Ladd_ovf 209336817Sdim } 210336817Sdim { 211336817Sdim p0 = cmp.gt(EXPB,#0) 212336817Sdim if (p0.new) jumpr:t r31 213336817Sdim TMP = sub(#1,EXPB) 214336817Sdim } 215336817Sdim { 216336817Sdim B = insert(A,#MANTBITS,#0) 217336817Sdim A = ATMP 218336817Sdim } 219336817Sdim { 220336817Sdim B = lsr(B,TMP) 221336817Sdim } 222336817Sdim { 223336817Sdim A = insert(B,#63,#0) 224336817Sdim jumpr r31 225336817Sdim } 226336817Sdim .falign 227336817Sdim.Ladd_ovf: 228336817Sdim // We get either max finite value or infinity. Either way, overflow+inexact 229336817Sdim { 230336817Sdim A = ATMP // 2's complement value 231336817Sdim TMP = USR 232336817Sdim ATMP = combine(##0x7fefffff,#-1) // positive max finite 233336817Sdim } 234336817Sdim { 235336817Sdim EXPB = extractu(TMP,#2,#SR_ROUND_OFF) // rounding bits 236336817Sdim TMP = or(TMP,#0x28) // inexact + overflow 237336817Sdim BTMP = combine(##0x7ff00000,#0) // positive infinity 238336817Sdim } 239336817Sdim { 240336817Sdim USR = TMP 241336817Sdim EXPB ^= lsr(AH,#31) // Does sign match rounding? 242336817Sdim TMP = EXPB // unmodified rounding mode 243336817Sdim } 244336817Sdim { 245336817Sdim p0 = !cmp.eq(TMP,#1) // If not round-to-zero and 246336817Sdim p0 = !cmp.eq(EXPB,#2) // Not rounding the other way, 247336817Sdim if (p0.new) ATMP = BTMP // we should get infinity 248336817Sdim } 249336817Sdim { 250336817Sdim A = insert(ATMP,#63,#0) // insert inf/maxfinite, leave sign 251336817Sdim } 252336817Sdim { 253336817Sdim p0 = dfcmp.eq(A,A) 254336817Sdim jumpr r31 255336817Sdim } 256336817Sdim 257336817Sdim.Ladd_abnormal: 258336817Sdim { 259336817Sdim ATMP = extractu(A,#63,#0) // strip off sign 260336817Sdim BTMP = extractu(B,#63,#0) // strip off sign 261336817Sdim } 262336817Sdim { 263336817Sdim p3 = cmp.gtu(ATMP,BTMP) 264336817Sdim if (!p3.new) A = B // sort values 265336817Sdim if (!p3.new) B = A // sort values 266336817Sdim } 267336817Sdim { 268336817Sdim // Any NaN --> NaN, possibly raise invalid if sNaN 269336817Sdim p0 = dfclass(A,#0x0f) // A not NaN? 270336817Sdim if (!p0.new) jump:nt .Linvalid_nan_add 271336817Sdim if (!p3) ATMP = BTMP 272336817Sdim if (!p3) BTMP = ATMP 273336817Sdim } 274336817Sdim { 275336817Sdim // Infinity + non-infinity number is infinity 276336817Sdim // Infinity + infinity --> inf or nan 277336817Sdim p1 = dfclass(A,#0x08) // A is infinity 278336817Sdim if (p1.new) jump:nt .Linf_add 279336817Sdim } 280336817Sdim { 281336817Sdim p2 = dfclass(B,#0x01) // B is zero 282336817Sdim if (p2.new) jump:nt .LB_zero // so return A or special 0+0 283336817Sdim ATMP = #0 284336817Sdim } 285336817Sdim // We are left with adding one or more subnormals 286336817Sdim { 287336817Sdim p0 = dfclass(A,#4) 288336817Sdim if (p0.new) jump:nt .Ladd_two_subnormal 289336817Sdim ATMP = combine(##0x20000000,#0) 290336817Sdim } 291336817Sdim { 292336817Sdim EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS) 293336817Sdim EXPB = #1 294336817Sdim // BTMP already ABS(B) 295336817Sdim BTMP = asl(BTMP,#EXPBITS-2) 296336817Sdim } 297336817Sdim#undef ZERO 298336817Sdim#define EXTRACTOFF r14 299336817Sdim#define EXPDIFF r15 300336817Sdim { 301336817Sdim ATMP = insert(A,#MANTBITS,#EXPBITS-2) 302336817Sdim EXPDIFF = sub(EXPA,EXPB) 303336817Sdim ZTMP = combine(#62,#1) 304336817Sdim jump .Ladd_continue 305336817Sdim } 306336817Sdim 307336817Sdim.Ladd_two_subnormal: 308336817Sdim { 309336817Sdim ATMP = extractu(A,#63,#0) 310336817Sdim BTMP = extractu(B,#63,#0) 311336817Sdim } 312336817Sdim { 313336817Sdim ATMP = neg(ATMP) 314336817Sdim BTMP = neg(BTMP) 315336817Sdim p0 = cmp.gt(AH,#-1) 316336817Sdim p1 = cmp.gt(BH,#-1) 317336817Sdim } 318336817Sdim { 319336817Sdim if (p0) ATMP = A 320336817Sdim if (p1) BTMP = B 321336817Sdim } 322336817Sdim { 323336817Sdim ATMP = add(ATMP,BTMP) 324336817Sdim } 325336817Sdim { 326336817Sdim BTMP = neg(ATMP) 327336817Sdim p0 = cmp.gt(ATMPH,#-1) 328336817Sdim B = #0 329336817Sdim } 330336817Sdim { 331336817Sdim if (!p0) A = BTMP 332336817Sdim if (p0) A = ATMP 333336817Sdim BH = ##0x80000000 334336817Sdim } 335336817Sdim { 336336817Sdim if (!p0) AH = or(AH,BH) 337336817Sdim p0 = dfcmp.eq(A,B) 338336817Sdim if (p0.new) jump:nt .Lzero_plus_zero 339336817Sdim } 340336817Sdim { 341336817Sdim jumpr r31 342336817Sdim } 343336817Sdim 344336817Sdim.Linvalid_nan_add: 345336817Sdim { 346336817Sdim TMP = convert_df2sf(A) // will generate invalid if sNaN 347336817Sdim p0 = dfclass(B,#0x0f) // if B is not NaN 348336817Sdim if (p0.new) B = A // make it whatever A is 349336817Sdim } 350336817Sdim { 351336817Sdim BL = convert_df2sf(B) // will generate invalid if sNaN 352336817Sdim A = #-1 353336817Sdim jumpr r31 354336817Sdim } 355336817Sdim .falign 356336817Sdim.LB_zero: 357336817Sdim { 358336817Sdim p0 = dfcmp.eq(ATMP,A) // is A also zero? 359336817Sdim if (!p0.new) jumpr:t r31 // If not, just return A 360336817Sdim } 361336817Sdim // 0 + 0 is special 362336817Sdim // if equal integral values, they have the same sign, which is fine for all rounding 363336817Sdim // modes. 364336817Sdim // If unequal in sign, we get +0 for all rounding modes except round down 365336817Sdim.Lzero_plus_zero: 366336817Sdim { 367336817Sdim p0 = cmp.eq(A,B) 368336817Sdim if (p0.new) jumpr:t r31 369336817Sdim } 370336817Sdim { 371336817Sdim TMP = USR 372336817Sdim } 373336817Sdim { 374336817Sdim TMP = extractu(TMP,#2,#SR_ROUND_OFF) 375336817Sdim A = #0 376336817Sdim } 377336817Sdim { 378336817Sdim p0 = cmp.eq(TMP,#2) 379336817Sdim if (p0.new) AH = ##0x80000000 380336817Sdim jumpr r31 381336817Sdim } 382336817Sdim.Linf_add: 383336817Sdim // adding infinities is only OK if they are equal 384336817Sdim { 385336817Sdim p0 = !cmp.eq(AH,BH) // Do they have different signs 386336817Sdim p0 = dfclass(B,#8) // And is B also infinite? 387336817Sdim if (!p0.new) jumpr:t r31 // If not, just a normal inf 388336817Sdim } 389336817Sdim { 390336817Sdim BL = ##0x7f800001 // sNAN 391336817Sdim } 392336817Sdim { 393336817Sdim A = convert_sf2df(BL) // trigger invalid, set NaN 394336817Sdim jumpr r31 395336817Sdim } 396336817SdimEND(__hexagon_adddf3) 397