1/* 2 * Included file to common source float/double checking 3 * The following macros should be defined: 4 * TYPE -- floating point type 5 * NAME -- convert a name to include the type 6 * UNS_TYPE -- type to hold TYPE as an unsigned number 7 * EXP_SIZE -- size in bits of the exponent 8 * MAN_SIZE -- size in bits of the mantissa 9 * UNS_ABS -- absolute value for UNS_TYPE 10 * FABS -- absolute value function for TYPE 11 * FMAX -- maximum function for TYPE 12 * FMIN -- minimum function for TYPE 13 * SQRT -- square root function for TYPE 14 * RMIN -- minimum random number to generate 15 * RMAX -- maximum random number to generate 16 * ASMDIV -- assembler instruction to do divide 17 * ASMSQRT -- assembler instruction to do square root 18 * BDIV -- # of bits of inaccuracy to allow for division 19 * BRSQRT -- # of bits of inaccuracy to allow for 1/sqrt 20 * INIT_DIV -- Initial values to test 1/x against 21 * INIT_RSQRT -- Initial values to test 1/sqrt(x) against 22 */ 23 24typedef union 25{ 26 UNS_TYPE i; 27 TYPE x; 28} NAME (union); 29 30/* 31 * Input/output arrays. 32 */ 33 34static NAME (union) NAME (div_input) [] __attribute__((__aligned__(32))) = INIT_DIV; 35static NAME (union) NAME (rsqrt_input)[] __attribute__((__aligned__(32))) = INIT_RSQRT; 36 37#define DIV_SIZE (sizeof (NAME (div_input)) / sizeof (TYPE)) 38#define RSQRT_SIZE (sizeof (NAME (rsqrt_input)) / sizeof (TYPE)) 39 40static TYPE NAME (div_expected)[DIV_SIZE] __attribute__((__aligned__(32))); 41static TYPE NAME (div_output) [DIV_SIZE] __attribute__((__aligned__(32))); 42 43static TYPE NAME (rsqrt_expected)[RSQRT_SIZE] __attribute__((__aligned__(32))); 44static TYPE NAME (rsqrt_output) [RSQRT_SIZE] __attribute__((__aligned__(32))); 45 46 47/* 48 * Crack a floating point number into sign bit, exponent, and mantissa. 49 */ 50 51static void 52NAME (crack) (TYPE number, unsigned int *p_sign, unsigned *p_exponent, UNS_TYPE *p_mantissa) 53{ 54 NAME (union) u; 55 UNS_TYPE bits; 56 57 u.x = number; 58 bits = u.i; 59 60 *p_sign = (unsigned int)((bits >> (EXP_SIZE + MAN_SIZE)) & 0x1); 61 *p_exponent = (unsigned int)((bits >> MAN_SIZE) & ((((UNS_TYPE)1) << EXP_SIZE) - 1)); 62 *p_mantissa = bits & ((((UNS_TYPE)1) << MAN_SIZE) - 1); 63 return; 64} 65 66 67/* 68 * Prevent optimizer from eliminating + 0.0 to remove -0.0. 69 */ 70 71volatile TYPE NAME (math_diff_0) = ((TYPE) 0.0); 72 73/* 74 * Return negative if two numbers are significanly different or return the 75 * number of bits that are different in the mantissa. 76 */ 77 78static int 79NAME (math_diff) (TYPE a, TYPE b, int bits) 80{ 81 TYPE zero = NAME (math_diff_0); 82 unsigned int sign_a, sign_b; 83 unsigned int exponent_a, exponent_b; 84 UNS_TYPE mantissa_a, mantissa_b, diff; 85 int i; 86 87 /* eliminate signed zero. */ 88 a += zero; 89 b += zero; 90 91 /* special case Nan. */ 92 if (__builtin_isnan (a)) 93 return (__builtin_isnan (b) ? 0 : -1); 94 95 if (a == b) 96 return 0; 97 98 /* special case infinity. */ 99 if (__builtin_isinf (a)) 100 return (__builtin_isinf (b) ? 0 : -1); 101 102 /* punt on denormal numbers. */ 103 if (!__builtin_isnormal (a) || !__builtin_isnormal (b)) 104 return -1; 105 106 NAME (crack) (a, &sign_a, &exponent_a, &mantissa_a); 107 NAME (crack) (b, &sign_b, &exponent_b, &mantissa_b); 108 109 /* If the sign is different, there is no hope. */ 110 if (sign_a != sign_b) 111 return -1; 112 113 /* If the exponent is off by 1, see if the values straddle the power of two, 114 and adjust things to do the mantassa check if we can. */ 115 if ((exponent_a == (exponent_b+1)) || (exponent_a == (exponent_b-1))) 116 { 117 TYPE big = FMAX (a, b); 118 TYPE small = FMIN (a, b); 119 TYPE diff = FABS (a - b); 120 unsigned int sign_big, sign_small, sign_test; 121 unsigned int exponent_big, exponent_small, exponent_test; 122 UNS_TYPE mantissa_big, mantissa_small, mantissa_test; 123 124 NAME (crack) (big, &sign_big, &exponent_big, &mantissa_big); 125 NAME (crack) (small, &sign_small, &exponent_small, &mantissa_small); 126 127 NAME (crack) (small - diff, &sign_test, &exponent_test, &mantissa_test); 128 if ((sign_test == sign_small) && (exponent_test == exponent_small)) 129 { 130 mantissa_a = mantissa_small; 131 mantissa_b = mantissa_test; 132 } 133 134 else 135 { 136 NAME (crack) (big + diff, &sign_test, &exponent_test, &mantissa_test); 137 if ((sign_test == sign_big) && (exponent_test == exponent_big)) 138 { 139 mantissa_a = mantissa_big; 140 mantissa_b = mantissa_test; 141 } 142 143 else 144 return -1; 145 } 146 } 147 148 else if (exponent_a != exponent_b) 149 return -1; 150 151 diff = UNS_ABS (mantissa_a - mantissa_b); 152 for (i = MAN_SIZE; i > 0; i--) 153 { 154 if ((diff & ((UNS_TYPE)1) << (i-1)) != 0) 155 return i; 156 } 157 158 return -1; 159} 160 161 162/* 163 * Turn off inlining to make code inspection easier. 164 */ 165 166static void NAME (asm_div) (void) __attribute__((__noinline__)); 167static void NAME (vector_div) (void) __attribute__((__noinline__)); 168static void NAME (scalar_div) (void) __attribute__((__noinline__)); 169static void NAME (asm_rsqrt) (void) __attribute__((__noinline__)); 170static void NAME (vector_rsqrt) (void) __attribute__((__noinline__)); 171static void NAME (scalar_rsqrt) (void) __attribute__((__noinline__)); 172static void NAME (check_div) (const char *) __attribute__((__noinline__)); 173static void NAME (check_rsqrt) (const char *) __attribute__((__noinline__)); 174static void NAME (run) (void) __attribute__((__noinline__)); 175 176 177/* 178 * Division function that might be vectorized. 179 */ 180 181static void 182NAME (vector_div) (void) 183{ 184 size_t i; 185 186 for (i = 0; i < DIV_SIZE; i++) 187 NAME (div_output)[i] = ((TYPE) 1.0) / NAME (div_input)[i].x; 188} 189 190/* 191 * Division function that is not vectorized. 192 */ 193 194static void 195NAME (scalar_div) (void) 196{ 197 size_t i; 198 199 for (i = 0; i < DIV_SIZE; i++) 200 { 201 TYPE x = ((TYPE) 1.0) / NAME (div_input)[i].x; 202 TYPE y; 203 __asm__ ("" : "=d" (y) : "0" (x)); 204 NAME (div_output)[i] = y; 205 } 206} 207 208/* 209 * Generate the division instruction via asm. 210 */ 211 212static void 213NAME (asm_div) (void) 214{ 215 size_t i; 216 217 for (i = 0; i < DIV_SIZE; i++) 218 { 219 TYPE x; 220 __asm__ (ASMDIV " %0,%1,%2" 221 : "=d" (x) 222 : "d" ((TYPE) 1.0), "d" (NAME (div_input)[i].x)); 223 NAME (div_expected)[i] = x; 224 } 225} 226 227/* 228 * Reciprocal square root function that might be vectorized. 229 */ 230 231static void 232NAME (vector_rsqrt) (void) 233{ 234 size_t i; 235 236 for (i = 0; i < RSQRT_SIZE; i++) 237 NAME (rsqrt_output)[i] = ((TYPE) 1.0) / SQRT (NAME (rsqrt_input)[i].x); 238} 239 240/* 241 * Reciprocal square root function that is not vectorized. 242 */ 243 244static void 245NAME (scalar_rsqrt) (void) 246{ 247 size_t i; 248 249 for (i = 0; i < RSQRT_SIZE; i++) 250 { 251 TYPE x = ((TYPE) 1.0) / SQRT (NAME (rsqrt_input)[i].x); 252 TYPE y; 253 __asm__ ("" : "=d" (y) : "0" (x)); 254 NAME (rsqrt_output)[i] = y; 255 } 256} 257 258/* 259 * Generate the 1/sqrt instructions via asm. 260 */ 261 262static void 263NAME (asm_rsqrt) (void) 264{ 265 size_t i; 266 267 for (i = 0; i < RSQRT_SIZE; i++) 268 { 269 TYPE x; 270 TYPE y; 271 __asm__ (ASMSQRT " %0,%1" : "=d" (x) : "d" (NAME (rsqrt_input)[i].x)); 272 __asm__ (ASMDIV " %0,%1,%2" : "=d" (y) : "d" ((TYPE) 1.0), "d" (x)); 273 NAME (rsqrt_expected)[i] = y; 274 } 275} 276 277 278/* 279 * Functions to abort or report errors. 280 */ 281 282static int NAME (error_count) = 0; 283 284#ifdef VERBOSE 285static int NAME (max_bits_div) = 0; 286static int NAME (max_bits_rsqrt) = 0; 287#endif 288 289 290/* 291 * Compare the expected value with the value we got. 292 */ 293 294static void 295NAME (check_div) (const char *test) 296{ 297 size_t i; 298 int b; 299 300 for (i = 0; i < DIV_SIZE; i++) 301 { 302 TYPE exp = NAME (div_expected)[i]; 303 TYPE out = NAME (div_output)[i]; 304 b = NAME (math_diff) (exp, out, BDIV); 305 306#ifdef VERBOSE 307 if (b != 0) 308 { 309 NAME (union) u_in = NAME (div_input)[i]; 310 NAME (union) u_exp; 311 NAME (union) u_out; 312 char explanation[64]; 313 const char *p_exp; 314 315 if (b < 0) 316 p_exp = "failed"; 317 else 318 { 319 p_exp = explanation; 320 sprintf (explanation, "%d bit error%s", b, (b > BDIV) ? ", failed" : ""); 321 } 322 323 u_exp.x = exp; 324 u_out.x = out; 325 printf ("%s %s %s for 1.0 / %g [0x%llx], expected %g [0x%llx], got %g [0x%llx]\n", 326 TNAME (TYPE), test, p_exp, 327 (double) u_in.x, (unsigned long long) u_in.i, 328 (double) exp, (unsigned long long) u_exp.i, 329 (double) out, (unsigned long long) u_out.i); 330 } 331#endif 332 333 if (b < 0 || b > BDIV) 334 NAME (error_count)++; 335 336#ifdef VERBOSE 337 if (b > NAME (max_bits_div)) 338 NAME (max_bits_div) = b; 339#endif 340 } 341} 342 343static void 344NAME (check_rsqrt) (const char *test) 345{ 346 size_t i; 347 int b; 348 349 for (i = 0; i < RSQRT_SIZE; i++) 350 { 351 TYPE exp = NAME (rsqrt_expected)[i]; 352 TYPE out = NAME (rsqrt_output)[i]; 353 b = NAME (math_diff) (exp, out, BRSQRT); 354 355#ifdef VERBOSE 356 if (b != 0) 357 { 358 NAME (union) u_in = NAME (rsqrt_input)[i]; 359 NAME (union) u_exp; 360 NAME (union) u_out; 361 char explanation[64]; 362 const char *p_exp; 363 364 if (b < 0) 365 p_exp = "failed"; 366 else 367 { 368 p_exp = explanation; 369 sprintf (explanation, "%d bit error%s", b, (b > BDIV) ? ", failed" : ""); 370 } 371 372 u_exp.x = exp; 373 u_out.x = out; 374 printf ("%s %s %s for 1 / sqrt (%g) [0x%llx], expected %g [0x%llx], got %g [0x%llx]\n", 375 TNAME (TYPE), test, p_exp, 376 (double) u_in.x, (unsigned long long) u_in.i, 377 (double) exp, (unsigned long long) u_exp.i, 378 (double) out, (unsigned long long) u_out.i); 379 } 380#endif 381 382 if (b < 0 || b > BRSQRT) 383 NAME (error_count)++; 384 385#ifdef VERBOSE 386 if (b > NAME (max_bits_rsqrt)) 387 NAME (max_bits_rsqrt) = b; 388#endif 389 } 390} 391 392 393/* 394 * Now do everything. 395 */ 396 397static void 398NAME (run) (void) 399{ 400#ifdef VERBOSE 401 printf ("start run_%s, divide size = %ld, rsqrt size = %ld, %d bit%s for a/b, %d bit%s for 1/sqrt(a)\n", 402 TNAME (TYPE), 403 (long)DIV_SIZE, 404 (long)RSQRT_SIZE, 405 BDIV, (BDIV == 1) ? "" : "s", 406 BRSQRT, (BRSQRT == 1) ? "" : "s"); 407#endif 408 409 NAME (asm_div) (); 410 411 NAME (scalar_div) (); 412 NAME (check_div) ("scalar"); 413 414 NAME (vector_div) (); 415 NAME (check_div) ("vector"); 416 417 NAME (asm_rsqrt) (); 418 419 NAME (scalar_rsqrt) (); 420 NAME (check_rsqrt) ("scalar"); 421 422 NAME (vector_rsqrt) (); 423 NAME (check_rsqrt) ("vector"); 424 425#ifdef VERBOSE 426 printf ("end run_%s, errors = %d, max div bits = %d, max rsqrt bits = %d\n", 427 TNAME (TYPE), 428 NAME (error_count), 429 NAME (max_bits_div), 430 NAME (max_bits_rsqrt)); 431#endif 432} 433