dfaddsub.S revision 337136
1//===----------------------Hexagon builtin routine ------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is dual licensed under the MIT and the University of Illinois Open 6// Source Licenses. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9 10/* Double Precision Multiply */ 11 12#define A r1:0 13#define AH r1 14#define AL r0 15#define B r3:2 16#define BH r3 17#define BL r2 18 19#define EXPA r4 20#define EXPB r5 21#define EXPB_A r5:4 22 23#define ZTMP r7:6 24#define ZTMPH r7 25#define ZTMPL r6 26 27#define ATMP r13:12 28#define ATMPH r13 29#define ATMPL r12 30 31#define BTMP r9:8 32#define BTMPH r9 33#define BTMPL r8 34 35#define ATMP2 r11:10 36#define ATMP2H r11 37#define ATMP2L r10 38 39#define EXPDIFF r15 40#define EXTRACTOFF r14 41#define EXTRACTAMT r15:14 42 43#define TMP r28 44 45#define MANTBITS 52 46#define HI_MANTBITS 20 47#define EXPBITS 11 48#define BIAS 1024 49#define MANTISSA_TO_INT_BIAS 52 50#define SR_BIT_INEXACT 5 51 52#ifndef SR_ROUND_OFF 53#define SR_ROUND_OFF 22 54#endif 55 56#define NORMAL p3 57#define BIGB p2 58 59#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG 60#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG 61#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG 62#define END(TAG) .size TAG,.-TAG 63 64 .text 65 .global __hexagon_adddf3 66 .global __hexagon_subdf3 67 .type __hexagon_adddf3, @function 68 .type __hexagon_subdf3, @function 69 70Q6_ALIAS(adddf3) 71FAST_ALIAS(adddf3) 72FAST2_ALIAS(adddf3) 73Q6_ALIAS(subdf3) 74FAST_ALIAS(subdf3) 75FAST2_ALIAS(subdf3) 76 77 .p2align 5 78__hexagon_adddf3: 79 { 80 EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS) 81 EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS) 82 ATMP = combine(##0x20000000,#0) 83 } 84 { 85 NORMAL = dfclass(A,#2) 86 NORMAL = dfclass(B,#2) 87 BTMP = ATMP 88 BIGB = cmp.gtu(EXPB,EXPA) // Is B substantially greater than A? 89 } 90 { 91 if (!NORMAL) jump .Ladd_abnormal // If abnormal, go to special code 92 if (BIGB) A = B // if B >> A, swap A and B 93 if (BIGB) B = A // If B >> A, swap A and B 94 if (BIGB) EXPB_A = combine(EXPA,EXPB) // swap exponents 95 } 96 { 97 ATMP = insert(A,#MANTBITS,#EXPBITS-2) // Q1.62 98 BTMP = insert(B,#MANTBITS,#EXPBITS-2) // Q1.62 99 EXPDIFF = sub(EXPA,EXPB) 100 ZTMP = combine(#62,#1) 101 } 102#undef BIGB 103#undef NORMAL 104#define B_POS p3 105#define A_POS p2 106#define NO_STICKIES p1 107.Ladd_continue: 108 { 109 EXPDIFF = min(EXPDIFF,ZTMPH) // If exponent difference >= ~60, 110 // will collapse to sticky bit 111 ATMP2 = neg(ATMP) 112 A_POS = cmp.gt(AH,#-1) 113 EXTRACTOFF = #0 114 } 115 { 116 if (!A_POS) ATMP = ATMP2 117 ATMP2 = extractu(BTMP,EXTRACTAMT) 118 BTMP = ASR(BTMP,EXPDIFF) 119#undef EXTRACTAMT 120#undef EXPDIFF 121#undef EXTRACTOFF 122#define ZERO r15:14 123 ZERO = #0 124 } 125 { 126 NO_STICKIES = cmp.eq(ATMP2,ZERO) 127 if (!NO_STICKIES.new) BTMPL = or(BTMPL,ZTMPL) 128 EXPB = add(EXPA,#-BIAS-60) 129 B_POS = cmp.gt(BH,#-1) 130 } 131 { 132 ATMP = add(ATMP,BTMP) // ADD!!! 133 ATMP2 = sub(ATMP,BTMP) // Negate and ADD --> SUB!!! 134 ZTMP = combine(#54,##2045) 135 } 136 { 137 p0 = cmp.gtu(EXPA,ZTMPH) // must be pretty high in case of large cancellation 138 p0 = !cmp.gtu(EXPA,ZTMPL) 139 if (!p0.new) jump:nt .Ladd_ovf_unf 140 if (!B_POS) ATMP = ATMP2 // if B neg, pick difference 141 } 142 { 143 A = convert_d2df(ATMP) // Convert to Double Precision, taking care of flags, etc. So nice! 144 p0 = cmp.eq(ATMPH,#0) 145 p0 = cmp.eq(ATMPL,#0) 146 if (p0.new) jump:nt .Ladd_zero // or maybe conversion handles zero case correctly? 147 } 148 { 149 AH += asl(EXPB,#HI_MANTBITS) 150 jumpr r31 151 } 152 .falign 153__hexagon_subdf3: 154 { 155 BH = togglebit(BH,#31) 156 jump __qdsp_adddf3 157 } 158 159 160 .falign 161.Ladd_zero: 162 // True zero, full cancellation 163 // +0 unless round towards negative infinity 164 { 165 TMP = USR 166 A = #0 167 BH = #1 168 } 169 { 170 TMP = extractu(TMP,#2,#22) 171 BH = asl(BH,#31) 172 } 173 { 174 p0 = cmp.eq(TMP,#2) 175 if (p0.new) AH = xor(AH,BH) 176 jumpr r31 177 } 178 .falign 179.Ladd_ovf_unf: 180 // Overflow or Denormal is possible 181 // Good news: Underflow flag is not possible! 182 /* 183 * ATMP has 2's complement value 184 * 185 * EXPA has A's exponent, EXPB has EXPA-BIAS-60 186 * 187 * Convert, extract exponent, add adjustment. 188 * If > 2046, overflow 189 * If <= 0, denormal 190 * 191 * Note that we've not done our zero check yet, so do that too 192 * 193 */ 194 { 195 A = convert_d2df(ATMP) 196 p0 = cmp.eq(ATMPH,#0) 197 p0 = cmp.eq(ATMPL,#0) 198 if (p0.new) jump:nt .Ladd_zero 199 } 200 { 201 TMP = extractu(AH,#EXPBITS,#HI_MANTBITS) 202 AH += asl(EXPB,#HI_MANTBITS) 203 } 204 { 205 EXPB = add(EXPB,TMP) 206 B = combine(##0x00100000,#0) 207 } 208 { 209 p0 = cmp.gt(EXPB,##BIAS+BIAS-2) 210 if (p0.new) jump:nt .Ladd_ovf 211 } 212 { 213 p0 = cmp.gt(EXPB,#0) 214 if (p0.new) jumpr:t r31 215 TMP = sub(#1,EXPB) 216 } 217 { 218 B = insert(A,#MANTBITS,#0) 219 A = ATMP 220 } 221 { 222 B = lsr(B,TMP) 223 } 224 { 225 A = insert(B,#63,#0) 226 jumpr r31 227 } 228 .falign 229.Ladd_ovf: 230 // We get either max finite value or infinity. Either way, overflow+inexact 231 { 232 A = ATMP // 2's complement value 233 TMP = USR 234 ATMP = combine(##0x7fefffff,#-1) // positive max finite 235 } 236 { 237 EXPB = extractu(TMP,#2,#SR_ROUND_OFF) // rounding bits 238 TMP = or(TMP,#0x28) // inexact + overflow 239 BTMP = combine(##0x7ff00000,#0) // positive infinity 240 } 241 { 242 USR = TMP 243 EXPB ^= lsr(AH,#31) // Does sign match rounding? 244 TMP = EXPB // unmodified rounding mode 245 } 246 { 247 p0 = !cmp.eq(TMP,#1) // If not round-to-zero and 248 p0 = !cmp.eq(EXPB,#2) // Not rounding the other way, 249 if (p0.new) ATMP = BTMP // we should get infinity 250 } 251 { 252 A = insert(ATMP,#63,#0) // insert inf/maxfinite, leave sign 253 } 254 { 255 p0 = dfcmp.eq(A,A) 256 jumpr r31 257 } 258 259.Ladd_abnormal: 260 { 261 ATMP = extractu(A,#63,#0) // strip off sign 262 BTMP = extractu(B,#63,#0) // strip off sign 263 } 264 { 265 p3 = cmp.gtu(ATMP,BTMP) 266 if (!p3.new) A = B // sort values 267 if (!p3.new) B = A // sort values 268 } 269 { 270 // Any NaN --> NaN, possibly raise invalid if sNaN 271 p0 = dfclass(A,#0x0f) // A not NaN? 272 if (!p0.new) jump:nt .Linvalid_nan_add 273 if (!p3) ATMP = BTMP 274 if (!p3) BTMP = ATMP 275 } 276 { 277 // Infinity + non-infinity number is infinity 278 // Infinity + infinity --> inf or nan 279 p1 = dfclass(A,#0x08) // A is infinity 280 if (p1.new) jump:nt .Linf_add 281 } 282 { 283 p2 = dfclass(B,#0x01) // B is zero 284 if (p2.new) jump:nt .LB_zero // so return A or special 0+0 285 ATMP = #0 286 } 287 // We are left with adding one or more subnormals 288 { 289 p0 = dfclass(A,#4) 290 if (p0.new) jump:nt .Ladd_two_subnormal 291 ATMP = combine(##0x20000000,#0) 292 } 293 { 294 EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS) 295 EXPB = #1 296 // BTMP already ABS(B) 297 BTMP = asl(BTMP,#EXPBITS-2) 298 } 299#undef ZERO 300#define EXTRACTOFF r14 301#define EXPDIFF r15 302 { 303 ATMP = insert(A,#MANTBITS,#EXPBITS-2) 304 EXPDIFF = sub(EXPA,EXPB) 305 ZTMP = combine(#62,#1) 306 jump .Ladd_continue 307 } 308 309.Ladd_two_subnormal: 310 { 311 ATMP = extractu(A,#63,#0) 312 BTMP = extractu(B,#63,#0) 313 } 314 { 315 ATMP = neg(ATMP) 316 BTMP = neg(BTMP) 317 p0 = cmp.gt(AH,#-1) 318 p1 = cmp.gt(BH,#-1) 319 } 320 { 321 if (p0) ATMP = A 322 if (p1) BTMP = B 323 } 324 { 325 ATMP = add(ATMP,BTMP) 326 } 327 { 328 BTMP = neg(ATMP) 329 p0 = cmp.gt(ATMPH,#-1) 330 B = #0 331 } 332 { 333 if (!p0) A = BTMP 334 if (p0) A = ATMP 335 BH = ##0x80000000 336 } 337 { 338 if (!p0) AH = or(AH,BH) 339 p0 = dfcmp.eq(A,B) 340 if (p0.new) jump:nt .Lzero_plus_zero 341 } 342 { 343 jumpr r31 344 } 345 346.Linvalid_nan_add: 347 { 348 TMP = convert_df2sf(A) // will generate invalid if sNaN 349 p0 = dfclass(B,#0x0f) // if B is not NaN 350 if (p0.new) B = A // make it whatever A is 351 } 352 { 353 BL = convert_df2sf(B) // will generate invalid if sNaN 354 A = #-1 355 jumpr r31 356 } 357 .falign 358.LB_zero: 359 { 360 p0 = dfcmp.eq(ATMP,A) // is A also zero? 361 if (!p0.new) jumpr:t r31 // If not, just return A 362 } 363 // 0 + 0 is special 364 // if equal integral values, they have the same sign, which is fine for all rounding 365 // modes. 366 // If unequal in sign, we get +0 for all rounding modes except round down 367.Lzero_plus_zero: 368 { 369 p0 = cmp.eq(A,B) 370 if (p0.new) jumpr:t r31 371 } 372 { 373 TMP = USR 374 } 375 { 376 TMP = extractu(TMP,#2,#SR_ROUND_OFF) 377 A = #0 378 } 379 { 380 p0 = cmp.eq(TMP,#2) 381 if (p0.new) AH = ##0x80000000 382 jumpr r31 383 } 384.Linf_add: 385 // adding infinities is only OK if they are equal 386 { 387 p0 = !cmp.eq(AH,BH) // Do they have different signs 388 p0 = dfclass(B,#8) // And is B also infinite? 389 if (!p0.new) jumpr:t r31 // If not, just a normal inf 390 } 391 { 392 BL = ##0x7f800001 // sNAN 393 } 394 { 395 A = convert_sf2df(BL) // trigger invalid, set NaN 396 jumpr r31 397 } 398END(__hexagon_adddf3) 399