1336817Sdim//===----------------------Hexagon builtin routine ------------------------===// 2336817Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6336817Sdim// 7336817Sdim//===----------------------------------------------------------------------===// 8336817Sdim 9353358Sdim// Double Precision Divide 10336817Sdim 11336817Sdim#define A r1:0 12336817Sdim#define AH r1 13336817Sdim#define AL r0 14336817Sdim 15336817Sdim#define B r3:2 16336817Sdim#define BH r3 17336817Sdim#define BL r2 18336817Sdim 19336817Sdim#define Q r5:4 20336817Sdim#define QH r5 21336817Sdim#define QL r4 22336817Sdim 23336817Sdim#define PROD r7:6 24336817Sdim#define PRODHI r7 25336817Sdim#define PRODLO r6 26336817Sdim 27336817Sdim#define SFONE r8 28336817Sdim#define SFDEN r9 29336817Sdim#define SFERROR r10 30336817Sdim#define SFRECIP r11 31336817Sdim 32336817Sdim#define EXPBA r13:12 33336817Sdim#define EXPB r13 34336817Sdim#define EXPA r12 35336817Sdim 36336817Sdim#define REMSUB2 r15:14 37336817Sdim 38336817Sdim 39336817Sdim 40336817Sdim#define SIGN r28 41336817Sdim 42336817Sdim#define Q_POSITIVE p3 43336817Sdim#define NORMAL p2 44336817Sdim#define NO_OVF_UNF p1 45336817Sdim#define P_TMP p0 46336817Sdim 47336817Sdim#define RECIPEST_SHIFT 3 48336817Sdim#define QADJ 61 49336817Sdim 50336817Sdim#define DFCLASS_NORMAL 0x02 51336817Sdim#define DFCLASS_NUMBER 0x0F 52336817Sdim#define DFCLASS_INFINITE 0x08 53336817Sdim#define DFCLASS_ZERO 0x01 54336817Sdim#define DFCLASS_NONZERO (DFCLASS_NUMBER ^ DFCLASS_ZERO) 55336817Sdim#define DFCLASS_NONINFINITE (DFCLASS_NUMBER ^ DFCLASS_INFINITE) 56336817Sdim 57336817Sdim#define DF_MANTBITS 52 58336817Sdim#define DF_EXPBITS 11 59336817Sdim#define SF_MANTBITS 23 60336817Sdim#define SF_EXPBITS 8 61336817Sdim#define DF_BIAS 0x3ff 62336817Sdim 63336817Sdim#define SR_ROUND_OFF 22 64336817Sdim 65336817Sdim#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG 66336817Sdim#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG 67336817Sdim#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG 68336817Sdim#define END(TAG) .size TAG,.-TAG 69336817Sdim 70336817Sdim .text 71336817Sdim .global __hexagon_divdf3 72336817Sdim .type __hexagon_divdf3,@function 73336817Sdim Q6_ALIAS(divdf3) 74336817Sdim FAST_ALIAS(divdf3) 75336817Sdim FAST2_ALIAS(divdf3) 76336817Sdim .p2align 5 77336817Sdim__hexagon_divdf3: 78336817Sdim { 79336817Sdim NORMAL = dfclass(A,#DFCLASS_NORMAL) 80336817Sdim NORMAL = dfclass(B,#DFCLASS_NORMAL) 81336817Sdim EXPBA = combine(BH,AH) 82336817Sdim SIGN = xor(AH,BH) 83336817Sdim } 84336817Sdim#undef A 85336817Sdim#undef AH 86336817Sdim#undef AL 87336817Sdim#undef B 88336817Sdim#undef BH 89336817Sdim#undef BL 90336817Sdim#define REM r1:0 91336817Sdim#define REMHI r1 92336817Sdim#define REMLO r0 93336817Sdim#define DENOM r3:2 94336817Sdim#define DENOMHI r3 95336817Sdim#define DENOMLO r2 96336817Sdim { 97336817Sdim if (!NORMAL) jump .Ldiv_abnormal 98336817Sdim PROD = extractu(DENOM,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS) 99336817Sdim SFONE = ##0x3f800001 100336817Sdim } 101336817Sdim { 102336817Sdim SFDEN = or(SFONE,PRODLO) 103336817Sdim EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32) 104336817Sdim EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32) 105336817Sdim Q_POSITIVE = cmp.gt(SIGN,#-1) 106336817Sdim } 107336817Sdim#undef SIGN 108336817Sdim#define ONE r28 109336817Sdim.Ldenorm_continue: 110336817Sdim { 111336817Sdim SFRECIP,P_TMP = sfrecipa(SFONE,SFDEN) 112336817Sdim SFERROR = and(SFONE,#-2) 113336817Sdim ONE = #1 114336817Sdim EXPA = sub(EXPA,EXPB) 115336817Sdim } 116336817Sdim#undef EXPB 117336817Sdim#define RECIPEST r13 118336817Sdim { 119336817Sdim SFERROR -= sfmpy(SFRECIP,SFDEN):lib 120336817Sdim REMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32) 121336817Sdim RECIPEST = ##0x00800000 << RECIPEST_SHIFT 122336817Sdim } 123336817Sdim { 124336817Sdim SFRECIP += sfmpy(SFRECIP,SFERROR):lib 125336817Sdim DENOMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32) 126336817Sdim SFERROR = and(SFONE,#-2) 127336817Sdim } 128336817Sdim { 129336817Sdim SFERROR -= sfmpy(SFRECIP,SFDEN):lib 130336817Sdim QH = #-DF_BIAS+1 131336817Sdim QL = #DF_BIAS-1 132336817Sdim } 133336817Sdim { 134336817Sdim SFRECIP += sfmpy(SFRECIP,SFERROR):lib 135336817Sdim NO_OVF_UNF = cmp.gt(EXPA,QH) 136336817Sdim NO_OVF_UNF = !cmp.gt(EXPA,QL) 137336817Sdim } 138336817Sdim { 139336817Sdim RECIPEST = insert(SFRECIP,#SF_MANTBITS,#RECIPEST_SHIFT) 140336817Sdim Q = #0 141336817Sdim EXPA = add(EXPA,#-QADJ) 142336817Sdim } 143336817Sdim#undef SFERROR 144336817Sdim#undef SFRECIP 145336817Sdim#define TMP r10 146336817Sdim#define TMP1 r11 147336817Sdim { 148336817Sdim RECIPEST = add(RECIPEST,#((-3) << RECIPEST_SHIFT)) 149336817Sdim } 150336817Sdim 151336817Sdim#define DIV_ITER1B(QSHIFTINSN,QSHIFT,REMSHIFT,EXTRA) \ 152336817Sdim { \ 153336817Sdim PROD = mpyu(RECIPEST,REMHI); \ 154336817Sdim REM = asl(REM,# ## ( REMSHIFT )); \ 155336817Sdim }; \ 156336817Sdim { \ 157336817Sdim PRODLO = # ## 0; \ 158336817Sdim REM -= mpyu(PRODHI,DENOMLO); \ 159336817Sdim REMSUB2 = mpyu(PRODHI,DENOMHI); \ 160336817Sdim }; \ 161336817Sdim { \ 162336817Sdim Q += QSHIFTINSN(PROD, # ## ( QSHIFT )); \ 163336817Sdim REM -= asl(REMSUB2, # ## 32); \ 164336817Sdim EXTRA \ 165336817Sdim } 166336817Sdim 167336817Sdim 168336817Sdim DIV_ITER1B(ASL,14,15,) 169336817Sdim DIV_ITER1B(ASR,1,15,) 170336817Sdim DIV_ITER1B(ASR,16,15,) 171336817Sdim DIV_ITER1B(ASR,31,15,PROD=# ( 0 );) 172336817Sdim 173336817Sdim#undef REMSUB2 174336817Sdim#define TMPPAIR r15:14 175336817Sdim#define TMPPAIRHI r15 176336817Sdim#define TMPPAIRLO r14 177336817Sdim#undef RECIPEST 178336817Sdim#define EXPB r13 179336817Sdim { 180336817Sdim // compare or sub with carry 181336817Sdim TMPPAIR = sub(REM,DENOM) 182336817Sdim P_TMP = cmp.gtu(DENOM,REM) 183336817Sdim // set up amt to add to q 184336817Sdim if (!P_TMP.new) PRODLO = #2 185336817Sdim } 186336817Sdim { 187336817Sdim Q = add(Q,PROD) 188336817Sdim if (!P_TMP) REM = TMPPAIR 189336817Sdim TMPPAIR = #0 190336817Sdim } 191336817Sdim { 192336817Sdim P_TMP = cmp.eq(REM,TMPPAIR) 193336817Sdim if (!P_TMP.new) QL = or(QL,ONE) 194336817Sdim } 195336817Sdim { 196336817Sdim PROD = neg(Q) 197336817Sdim } 198336817Sdim { 199336817Sdim if (!Q_POSITIVE) Q = PROD 200336817Sdim } 201336817Sdim#undef REM 202336817Sdim#undef REMHI 203336817Sdim#undef REMLO 204336817Sdim#undef DENOM 205336817Sdim#undef DENOMLO 206336817Sdim#undef DENOMHI 207336817Sdim#define A r1:0 208336817Sdim#define AH r1 209336817Sdim#define AL r0 210336817Sdim#define B r3:2 211336817Sdim#define BH r3 212336817Sdim#define BL r2 213336817Sdim { 214336817Sdim A = convert_d2df(Q) 215336817Sdim if (!NO_OVF_UNF) jump .Ldiv_ovf_unf 216336817Sdim } 217336817Sdim { 218336817Sdim AH += asl(EXPA,#DF_MANTBITS-32) 219336817Sdim jumpr r31 220336817Sdim } 221336817Sdim 222336817Sdim.Ldiv_ovf_unf: 223336817Sdim { 224336817Sdim AH += asl(EXPA,#DF_MANTBITS-32) 225336817Sdim EXPB = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32) 226336817Sdim } 227336817Sdim { 228336817Sdim PROD = abs(Q) 229336817Sdim EXPA = add(EXPA,EXPB) 230336817Sdim } 231336817Sdim { 232336817Sdim P_TMP = cmp.gt(EXPA,##DF_BIAS+DF_BIAS) // overflow 233336817Sdim if (P_TMP.new) jump:nt .Ldiv_ovf 234336817Sdim } 235336817Sdim { 236336817Sdim P_TMP = cmp.gt(EXPA,#0) 237336817Sdim if (P_TMP.new) jump:nt .Lpossible_unf // round up to normal possible... 238336817Sdim } 239353358Sdim // Underflow 240353358Sdim // We know what the infinite range exponent should be (EXPA) 241353358Sdim // Q is 2's complement, PROD is abs(Q) 242353358Sdim // Normalize Q, shift right, add a high bit, convert, change exponent 243336817Sdim 244336817Sdim#define FUDGE1 7 // how much to shift right 245336817Sdim#define FUDGE2 4 // how many guard/round to keep at lsbs 246336817Sdim 247336817Sdim { 248336817Sdim EXPB = add(clb(PROD),#-1) // doesn't need to be added in since 249336817Sdim EXPA = sub(#FUDGE1,EXPA) // we extract post-converted exponent 250336817Sdim TMP = USR 251336817Sdim TMP1 = #63 252336817Sdim } 253336817Sdim { 254336817Sdim EXPB = min(EXPA,TMP1) 255336817Sdim TMP1 = or(TMP,#0x030) 256336817Sdim PROD = asl(PROD,EXPB) 257336817Sdim EXPA = #0 258336817Sdim } 259336817Sdim { 260336817Sdim TMPPAIR = extractu(PROD,EXPBA) // bits that will get shifted out 261336817Sdim PROD = lsr(PROD,EXPB) // shift out bits 262336817Sdim B = #1 263336817Sdim } 264336817Sdim { 265336817Sdim P_TMP = cmp.gtu(B,TMPPAIR) 266336817Sdim if (!P_TMP.new) PRODLO = or(BL,PRODLO) 267336817Sdim PRODHI = setbit(PRODHI,#DF_MANTBITS-32+FUDGE2) 268336817Sdim } 269336817Sdim { 270336817Sdim Q = neg(PROD) 271336817Sdim P_TMP = bitsclr(PRODLO,#(1<<FUDGE2)-1) 272336817Sdim if (!P_TMP.new) TMP = TMP1 273336817Sdim } 274336817Sdim { 275336817Sdim USR = TMP 276336817Sdim if (Q_POSITIVE) Q = PROD 277336817Sdim TMP = #-DF_BIAS-(DF_MANTBITS+FUDGE2) 278336817Sdim } 279336817Sdim { 280336817Sdim A = convert_d2df(Q) 281336817Sdim } 282336817Sdim { 283336817Sdim AH += asl(TMP,#DF_MANTBITS-32) 284336817Sdim jumpr r31 285336817Sdim } 286336817Sdim 287336817Sdim 288336817Sdim.Lpossible_unf: 289353358Sdim // If upper parts of Q were all F's, but abs(A) == 0x00100000_00000000, we rounded up to min_normal 290353358Sdim // The answer is correct, but we need to raise Underflow 291336817Sdim { 292336817Sdim B = extractu(A,#63,#0) 293336817Sdim TMPPAIR = combine(##0x00100000,#0) // min normal 294336817Sdim TMP = #0x7FFF 295336817Sdim } 296336817Sdim { 297336817Sdim P_TMP = dfcmp.eq(TMPPAIR,B) // Is everything zero in the rounded value... 298336817Sdim P_TMP = bitsset(PRODHI,TMP) // but a bunch of bits set in the unrounded abs(quotient)? 299336817Sdim } 300336817Sdim 301336817Sdim#if (__HEXAGON_ARCH__ == 60) 302336817Sdim TMP = USR // If not, just return 303336817Sdim if (!P_TMP) jumpr r31 // Else, we want to set Unf+Inexact 304336817Sdim // Note that inexact is already set... 305336817Sdim#else 306336817Sdim { 307336817Sdim if (!P_TMP) jumpr r31 // If not, just return 308336817Sdim TMP = USR // Else, we want to set Unf+Inexact 309336817Sdim } // Note that inexact is already set... 310336817Sdim#endif 311336817Sdim { 312336817Sdim TMP = or(TMP,#0x30) 313336817Sdim } 314336817Sdim { 315336817Sdim USR = TMP 316336817Sdim } 317336817Sdim { 318336817Sdim p0 = dfcmp.eq(A,A) 319336817Sdim jumpr r31 320336817Sdim } 321336817Sdim 322336817Sdim.Ldiv_ovf: 323353358Sdim 324353358Sdim // Raise Overflow, and choose the correct overflow value (saturated normal or infinity) 325353358Sdim 326336817Sdim { 327336817Sdim TMP = USR 328336817Sdim B = combine(##0x7fefffff,#-1) 329336817Sdim AH = mux(Q_POSITIVE,#0,#-1) 330336817Sdim } 331336817Sdim { 332336817Sdim PROD = combine(##0x7ff00000,#0) 333336817Sdim QH = extractu(TMP,#2,#SR_ROUND_OFF) 334336817Sdim TMP = or(TMP,#0x28) 335336817Sdim } 336336817Sdim { 337336817Sdim USR = TMP 338336817Sdim QH ^= lsr(AH,#31) 339336817Sdim QL = QH 340336817Sdim } 341336817Sdim { 342336817Sdim p0 = !cmp.eq(QL,#1) // if not round-to-zero 343336817Sdim p0 = !cmp.eq(QH,#2) // and not rounding the other way 344336817Sdim if (p0.new) B = PROD // go to inf 345336817Sdim p0 = dfcmp.eq(B,B) // get exceptions 346336817Sdim } 347336817Sdim { 348336817Sdim A = insert(B,#63,#0) 349336817Sdim jumpr r31 350336817Sdim } 351336817Sdim 352336817Sdim#undef ONE 353336817Sdim#define SIGN r28 354336817Sdim#undef NORMAL 355336817Sdim#undef NO_OVF_UNF 356336817Sdim#define P_INF p1 357336817Sdim#define P_ZERO p2 358336817Sdim.Ldiv_abnormal: 359336817Sdim { 360336817Sdim P_TMP = dfclass(A,#DFCLASS_NUMBER) 361336817Sdim P_TMP = dfclass(B,#DFCLASS_NUMBER) 362336817Sdim Q_POSITIVE = cmp.gt(SIGN,#-1) 363336817Sdim } 364336817Sdim { 365336817Sdim P_INF = dfclass(A,#DFCLASS_INFINITE) 366336817Sdim P_INF = dfclass(B,#DFCLASS_INFINITE) 367336817Sdim } 368336817Sdim { 369336817Sdim P_ZERO = dfclass(A,#DFCLASS_ZERO) 370336817Sdim P_ZERO = dfclass(B,#DFCLASS_ZERO) 371336817Sdim } 372336817Sdim { 373336817Sdim if (!P_TMP) jump .Ldiv_nan 374336817Sdim if (P_INF) jump .Ldiv_invalid 375336817Sdim } 376336817Sdim { 377336817Sdim if (P_ZERO) jump .Ldiv_invalid 378336817Sdim } 379336817Sdim { 380336817Sdim P_ZERO = dfclass(A,#DFCLASS_NONZERO) // nonzero 381336817Sdim P_ZERO = dfclass(B,#DFCLASS_NONINFINITE) // non-infinite 382336817Sdim } 383336817Sdim { 384336817Sdim P_INF = dfclass(A,#DFCLASS_NONINFINITE) // non-infinite 385336817Sdim P_INF = dfclass(B,#DFCLASS_NONZERO) // nonzero 386336817Sdim } 387336817Sdim { 388336817Sdim if (!P_ZERO) jump .Ldiv_zero_result 389336817Sdim if (!P_INF) jump .Ldiv_inf_result 390336817Sdim } 391353358Sdim // Now we've narrowed it down to (de)normal / (de)normal 392353358Sdim // Set up A/EXPA B/EXPB and go back 393336817Sdim#undef P_ZERO 394336817Sdim#undef P_INF 395336817Sdim#define P_TMP2 p1 396336817Sdim { 397336817Sdim P_TMP = dfclass(A,#DFCLASS_NORMAL) 398336817Sdim P_TMP2 = dfclass(B,#DFCLASS_NORMAL) 399336817Sdim TMP = ##0x00100000 400336817Sdim } 401336817Sdim { 402336817Sdim EXPBA = combine(BH,AH) 403336817Sdim AH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32) // clear out hidden bit, sign bit 404336817Sdim BH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32) // clear out hidden bit, sign bit 405336817Sdim } 406336817Sdim { 407336817Sdim if (P_TMP) AH = or(AH,TMP) // if normal, add back in hidden bit 408336817Sdim if (P_TMP2) BH = or(BH,TMP) // if normal, add back in hidden bit 409336817Sdim } 410336817Sdim { 411336817Sdim QH = add(clb(A),#-DF_EXPBITS) 412336817Sdim QL = add(clb(B),#-DF_EXPBITS) 413336817Sdim TMP = #1 414336817Sdim } 415336817Sdim { 416336817Sdim EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32) 417336817Sdim EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32) 418336817Sdim } 419336817Sdim { 420336817Sdim A = asl(A,QH) 421336817Sdim B = asl(B,QL) 422336817Sdim if (!P_TMP) EXPA = sub(TMP,QH) 423336817Sdim if (!P_TMP2) EXPB = sub(TMP,QL) 424336817Sdim } // recreate values needed by resume coke 425336817Sdim { 426336817Sdim PROD = extractu(B,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS) 427336817Sdim } 428336817Sdim { 429336817Sdim SFDEN = or(SFONE,PRODLO) 430336817Sdim jump .Ldenorm_continue 431336817Sdim } 432336817Sdim 433336817Sdim.Ldiv_zero_result: 434336817Sdim { 435336817Sdim AH = xor(AH,BH) 436336817Sdim B = #0 437336817Sdim } 438336817Sdim { 439336817Sdim A = insert(B,#63,#0) 440336817Sdim jumpr r31 441336817Sdim } 442336817Sdim.Ldiv_inf_result: 443336817Sdim { 444336817Sdim p2 = dfclass(B,#DFCLASS_ZERO) 445336817Sdim p2 = dfclass(A,#DFCLASS_NONINFINITE) 446336817Sdim } 447336817Sdim { 448336817Sdim TMP = USR 449336817Sdim if (!p2) jump 1f 450336817Sdim AH = xor(AH,BH) 451336817Sdim } 452336817Sdim { 453336817Sdim TMP = or(TMP,#0x04) // DBZ 454336817Sdim } 455336817Sdim { 456336817Sdim USR = TMP 457336817Sdim } 458336817Sdim1: 459336817Sdim { 460336817Sdim B = combine(##0x7ff00000,#0) 461336817Sdim p0 = dfcmp.uo(B,B) // take possible exception 462336817Sdim } 463336817Sdim { 464336817Sdim A = insert(B,#63,#0) 465336817Sdim jumpr r31 466336817Sdim } 467336817Sdim.Ldiv_nan: 468336817Sdim { 469336817Sdim p0 = dfclass(A,#0x10) 470336817Sdim p1 = dfclass(B,#0x10) 471336817Sdim if (!p0.new) A = B 472336817Sdim if (!p1.new) B = A 473336817Sdim } 474336817Sdim { 475336817Sdim QH = convert_df2sf(A) // get possible invalid exceptions 476336817Sdim QL = convert_df2sf(B) 477336817Sdim } 478336817Sdim { 479336817Sdim A = #-1 480336817Sdim jumpr r31 481336817Sdim } 482336817Sdim 483336817Sdim.Ldiv_invalid: 484336817Sdim { 485336817Sdim TMP = ##0x7f800001 486336817Sdim } 487336817Sdim { 488336817Sdim A = convert_sf2df(TMP) // get invalid, get DF qNaN 489336817Sdim jumpr r31 490336817Sdim } 491336817SdimEND(__hexagon_divdf3) 492