dfsqrt.S revision 336817
1//===----------------------Hexagon builtin routine ------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is dual licensed under the MIT and the University of Illinois Open 6// Source Licenses. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10/* Double Precision square root */ 11 12#define EXP r28 13 14#define A r1:0 15#define AH r1 16#define AL r0 17 18#define SFSH r3:2 19#define SF_S r3 20#define SF_H r2 21 22#define SFHALF_SONE r5:4 23#define S_ONE r4 24#define SFHALF r5 25#define SF_D r6 26#define SF_E r7 27#define RECIPEST r8 28#define SFRAD r9 29 30#define FRACRAD r11:10 31#define FRACRADH r11 32#define FRACRADL r10 33 34#define ROOT r13:12 35#define ROOTHI r13 36#define ROOTLO r12 37 38#define PROD r15:14 39#define PRODHI r15 40#define PRODLO r14 41 42#define P_TMP p0 43#define P_EXP1 p1 44#define NORMAL p2 45 46#define SF_EXPBITS 8 47#define SF_MANTBITS 23 48 49#define DF_EXPBITS 11 50#define DF_MANTBITS 52 51 52#define DF_BIAS 0x3ff 53 54#define DFCLASS_ZERO 0x01 55#define DFCLASS_NORMAL 0x02 56#define DFCLASS_DENORMAL 0x02 57#define DFCLASS_INFINITE 0x08 58#define DFCLASS_NAN 0x10 59 60#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG; .type __qdsp_##TAG,@function 61#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG; .type __hexagon_fast_##TAG,@function 62#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG; .type __hexagon_fast2_##TAG,@function 63#define END(TAG) .size TAG,.-TAG 64 65 .text 66 .global __hexagon_sqrtdf2 67 .type __hexagon_sqrtdf2,@function 68 .global __hexagon_sqrt 69 .type __hexagon_sqrt,@function 70 Q6_ALIAS(sqrtdf2) 71 Q6_ALIAS(sqrt) 72 FAST_ALIAS(sqrtdf2) 73 FAST_ALIAS(sqrt) 74 FAST2_ALIAS(sqrtdf2) 75 FAST2_ALIAS(sqrt) 76 .type sqrt,@function 77 .p2align 5 78__hexagon_sqrtdf2: 79__hexagon_sqrt: 80 { 81 PROD = extractu(A,#SF_MANTBITS+1,#DF_MANTBITS-SF_MANTBITS) 82 EXP = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32) 83 SFHALF_SONE = combine(##0x3f000004,#1) 84 } 85 { 86 NORMAL = dfclass(A,#DFCLASS_NORMAL) // Is it normal 87 NORMAL = cmp.gt(AH,#-1) // and positive? 88 if (!NORMAL.new) jump:nt .Lsqrt_abnormal 89 SFRAD = or(SFHALF,PRODLO) 90 } 91#undef NORMAL 92.Ldenormal_restart: 93 { 94 FRACRAD = A 95 SF_E,P_TMP = sfinvsqrta(SFRAD) 96 SFHALF = and(SFHALF,#-16) 97 SFSH = #0 98 } 99#undef A 100#undef AH 101#undef AL 102#define ERROR r1:0 103#define ERRORHI r1 104#define ERRORLO r0 105 // SF_E : reciprocal square root 106 // SF_H : half rsqrt 107 // sf_S : square root 108 // SF_D : error term 109 // SFHALF: 0.5 110 { 111 SF_S += sfmpy(SF_E,SFRAD):lib // s0: root 112 SF_H += sfmpy(SF_E,SFHALF):lib // h0: 0.5*y0. Could also decrement exponent... 113 SF_D = SFHALF 114#undef SFRAD 115#define SHIFTAMT r9 116 SHIFTAMT = and(EXP,#1) 117 } 118 { 119 SF_D -= sfmpy(SF_S,SF_H):lib // d0: 0.5-H*S = 0.5-0.5*~1 120 FRACRADH = insert(S_ONE,#DF_EXPBITS+1,#DF_MANTBITS-32) // replace upper bits with hidden 121 P_EXP1 = cmp.gtu(SHIFTAMT,#0) 122 } 123 { 124 SF_S += sfmpy(SF_S,SF_D):lib // s1: refine sqrt 125 SF_H += sfmpy(SF_H,SF_D):lib // h1: refine half-recip 126 SF_D = SFHALF 127 SHIFTAMT = mux(P_EXP1,#8,#9) 128 } 129 { 130 SF_D -= sfmpy(SF_S,SF_H):lib // d1: error term 131 FRACRAD = asl(FRACRAD,SHIFTAMT) // Move fracrad bits to right place 132 SHIFTAMT = mux(P_EXP1,#3,#2) 133 } 134 { 135 SF_H += sfmpy(SF_H,SF_D):lib // d2: rsqrt 136 // cool trick: half of 1/sqrt(x) has same mantissa as 1/sqrt(x). 137 PROD = asl(FRACRAD,SHIFTAMT) // fracrad<<(2+exp1) 138 } 139 { 140 SF_H = and(SF_H,##0x007fffff) 141 } 142 { 143 SF_H = add(SF_H,##0x00800000 - 3) 144 SHIFTAMT = mux(P_EXP1,#7,#8) 145 } 146 { 147 RECIPEST = asl(SF_H,SHIFTAMT) 148 SHIFTAMT = mux(P_EXP1,#15-(1+1),#15-(1+0)) 149 } 150 { 151 ROOT = mpyu(RECIPEST,PRODHI) // root = mpyu_full(recipest,hi(fracrad<<(2+exp1))) 152 } 153 154#undef SFSH // r3:2 155#undef SF_H // r2 156#undef SF_S // r3 157#undef S_ONE // r4 158#undef SFHALF // r5 159#undef SFHALF_SONE // r5:4 160#undef SF_D // r6 161#undef SF_E // r7 162 163#define HL r3:2 164#define LL r5:4 165#define HH r7:6 166 167#undef P_EXP1 168#define P_CARRY0 p1 169#define P_CARRY1 p2 170#define P_CARRY2 p3 171 172 /* Iteration 0 */ 173 /* Maybe we can save a cycle by starting with ERROR=asl(fracrad), then as we multiply */ 174 /* We can shift and subtract instead of shift and add? */ 175 { 176 ERROR = asl(FRACRAD,#15) 177 PROD = mpyu(ROOTHI,ROOTHI) 178 P_CARRY0 = cmp.eq(r0,r0) 179 } 180 { 181 ERROR -= asl(PROD,#15) 182 PROD = mpyu(ROOTHI,ROOTLO) 183 P_CARRY1 = cmp.eq(r0,r0) 184 } 185 { 186 ERROR -= lsr(PROD,#16) 187 P_CARRY2 = cmp.eq(r0,r0) 188 } 189 { 190 ERROR = mpyu(ERRORHI,RECIPEST) 191 } 192 { 193 ROOT += lsr(ERROR,SHIFTAMT) 194 SHIFTAMT = add(SHIFTAMT,#16) 195 ERROR = asl(FRACRAD,#31) // for next iter 196 } 197 /* Iteration 1 */ 198 { 199 PROD = mpyu(ROOTHI,ROOTHI) 200 ERROR -= mpyu(ROOTHI,ROOTLO) // amount is 31, no shift needed 201 } 202 { 203 ERROR -= asl(PROD,#31) 204 PROD = mpyu(ROOTLO,ROOTLO) 205 } 206 { 207 ERROR -= lsr(PROD,#33) 208 } 209 { 210 ERROR = mpyu(ERRORHI,RECIPEST) 211 } 212 { 213 ROOT += lsr(ERROR,SHIFTAMT) 214 SHIFTAMT = add(SHIFTAMT,#16) 215 ERROR = asl(FRACRAD,#47) // for next iter 216 } 217 /* Iteration 2 */ 218 { 219 PROD = mpyu(ROOTHI,ROOTHI) 220 } 221 { 222 ERROR -= asl(PROD,#47) 223 PROD = mpyu(ROOTHI,ROOTLO) 224 } 225 { 226 ERROR -= asl(PROD,#16) // bidir shr 31-47 227 PROD = mpyu(ROOTLO,ROOTLO) 228 } 229 { 230 ERROR -= lsr(PROD,#17) // 64-47 231 } 232 { 233 ERROR = mpyu(ERRORHI,RECIPEST) 234 } 235 { 236 ROOT += lsr(ERROR,SHIFTAMT) 237 } 238#undef ERROR 239#undef PROD 240#undef PRODHI 241#undef PRODLO 242#define REM_HI r15:14 243#define REM_HI_HI r15 244#define REM_LO r1:0 245#undef RECIPEST 246#undef SHIFTAMT 247#define TWOROOT_LO r9:8 248 /* Adjust Root */ 249 { 250 HL = mpyu(ROOTHI,ROOTLO) 251 LL = mpyu(ROOTLO,ROOTLO) 252 REM_HI = #0 253 REM_LO = #0 254 } 255 { 256 HL += lsr(LL,#33) 257 LL += asl(HL,#33) 258 P_CARRY0 = cmp.eq(r0,r0) 259 } 260 { 261 HH = mpyu(ROOTHI,ROOTHI) 262 REM_LO = sub(REM_LO,LL,P_CARRY0):carry 263 TWOROOT_LO = #1 264 } 265 { 266 HH += lsr(HL,#31) 267 TWOROOT_LO += asl(ROOT,#1) 268 } 269#undef HL 270#undef LL 271#define REM_HI_TMP r3:2 272#define REM_HI_TMP_HI r3 273#define REM_LO_TMP r5:4 274 { 275 REM_HI = sub(FRACRAD,HH,P_CARRY0):carry 276 REM_LO_TMP = sub(REM_LO,TWOROOT_LO,P_CARRY1):carry 277#undef FRACRAD 278#undef HH 279#define ZERO r11:10 280#define ONE r7:6 281 ONE = #1 282 ZERO = #0 283 } 284 { 285 REM_HI_TMP = sub(REM_HI,ZERO,P_CARRY1):carry 286 ONE = add(ROOT,ONE) 287 EXP = add(EXP,#-DF_BIAS) // subtract bias --> signed exp 288 } 289 { 290 // If carry set, no borrow: result was still positive 291 if (P_CARRY1) ROOT = ONE 292 if (P_CARRY1) REM_LO = REM_LO_TMP 293 if (P_CARRY1) REM_HI = REM_HI_TMP 294 } 295 { 296 REM_LO_TMP = sub(REM_LO,TWOROOT_LO,P_CARRY2):carry 297 ONE = #1 298 EXP = asr(EXP,#1) // divide signed exp by 2 299 } 300 { 301 REM_HI_TMP = sub(REM_HI,ZERO,P_CARRY2):carry 302 ONE = add(ROOT,ONE) 303 } 304 { 305 if (P_CARRY2) ROOT = ONE 306 if (P_CARRY2) REM_LO = REM_LO_TMP 307 // since tworoot <= 2^32, remhi must be zero 308#undef REM_HI_TMP 309#undef REM_HI_TMP_HI 310#define S_ONE r2 311#define ADJ r3 312 S_ONE = #1 313 } 314 { 315 P_TMP = cmp.eq(REM_LO,ZERO) // is the low part zero 316 if (!P_TMP.new) ROOTLO = or(ROOTLO,S_ONE) // if so, it's exact... hopefully 317 ADJ = cl0(ROOT) 318 EXP = add(EXP,#-63) 319 } 320#undef REM_LO 321#define RET r1:0 322#define RETHI r1 323 { 324 RET = convert_ud2df(ROOT) // set up mantissa, maybe set inexact flag 325 EXP = add(EXP,ADJ) // add back bias 326 } 327 { 328 RETHI += asl(EXP,#DF_MANTBITS-32) // add exponent adjust 329 jumpr r31 330 } 331#undef REM_LO_TMP 332#undef REM_HI_TMP 333#undef REM_HI_TMP_HI 334#undef REM_LO 335#undef REM_HI 336#undef TWOROOT_LO 337 338#undef RET 339#define A r1:0 340#define AH r1 341#define AL r1 342#undef S_ONE 343#define TMP r3:2 344#define TMPHI r3 345#define TMPLO r2 346#undef P_CARRY0 347#define P_NEG p1 348 349 350#define SFHALF r5 351#define SFRAD r9 352.Lsqrt_abnormal: 353 { 354 P_TMP = dfclass(A,#DFCLASS_ZERO) // zero? 355 if (P_TMP.new) jumpr:t r31 356 } 357 { 358 P_TMP = dfclass(A,#DFCLASS_NAN) 359 if (P_TMP.new) jump:nt .Lsqrt_nan 360 } 361 { 362 P_TMP = cmp.gt(AH,#-1) 363 if (!P_TMP.new) jump:nt .Lsqrt_invalid_neg 364 if (!P_TMP.new) EXP = ##0x7F800001 // sNaN 365 } 366 { 367 P_TMP = dfclass(A,#DFCLASS_INFINITE) 368 if (P_TMP.new) jumpr:nt r31 369 } 370 // If we got here, we're denormal 371 // prepare to restart 372 { 373 A = extractu(A,#DF_MANTBITS,#0) // Extract mantissa 374 } 375 { 376 EXP = add(clb(A),#-DF_EXPBITS) // how much to normalize? 377 } 378 { 379 A = asl(A,EXP) // Shift mantissa 380 EXP = sub(#1,EXP) // Form exponent 381 } 382 { 383 AH = insert(EXP,#1,#DF_MANTBITS-32) // insert lsb of exponent 384 } 385 { 386 TMP = extractu(A,#SF_MANTBITS+1,#DF_MANTBITS-SF_MANTBITS) // get sf value (mant+exp1) 387 SFHALF = ##0x3f000004 // form half constant 388 } 389 { 390 SFRAD = or(SFHALF,TMPLO) // form sf value 391 SFHALF = and(SFHALF,#-16) 392 jump .Ldenormal_restart // restart 393 } 394.Lsqrt_nan: 395 { 396 EXP = convert_df2sf(A) // if sNaN, get invalid 397 A = #-1 // qNaN 398 jumpr r31 399 } 400.Lsqrt_invalid_neg: 401 { 402 A = convert_sf2df(EXP) // Invalid,NaNval 403 jumpr r31 404 } 405END(__hexagon_sqrt) 406END(__hexagon_sqrtdf2) 407