1/*
2 * Included file to common source float/double checking
3 * The following macros should be defined:
4 *	TYPE	   -- floating point type
5 *	NAME	   -- convert a name to include the type
6 *	UNS_TYPE   -- type to hold TYPE as an unsigned number
7 *	EXP_SIZE   -- size in bits of the exponent
8 *	MAN_SIZE   -- size in bits of the mantissa
9 *	UNS_ABS	   -- absolute value for UNS_TYPE
10 *	FABS	   -- absolute value function for TYPE
11 *	FMAX	   -- maximum function for TYPE
12 *	FMIN	   -- minimum function for TYPE
13 *	SQRT	   -- square root function for TYPE
14 *	RMIN	   -- minimum random number to generate
15 *	RMAX	   -- maximum random number to generate
16 *	ASMDIV	   -- assembler instruction to do divide
17 *	ASMSQRT	   -- assembler instruction to do square root
18 *	BDIV	   -- # of bits of inaccuracy to allow for division
19 *	BRSQRT	   -- # of bits of inaccuracy to allow for 1/sqrt
20 *	INIT_DIV   -- Initial values to test 1/x against
21 *	INIT_RSQRT -- Initial values to test 1/sqrt(x) against
22 */
23
24typedef union
25{
26  UNS_TYPE i;
27  TYPE x;
28} NAME (union);
29
30/*
31 * Input/output arrays.
32 */
33
34static NAME (union) NAME (div_input)  [] __attribute__((__aligned__(32))) = INIT_DIV;
35static NAME (union) NAME (rsqrt_input)[] __attribute__((__aligned__(32))) = INIT_RSQRT;
36
37#define DIV_SIZE   (sizeof (NAME (div_input))   / sizeof (TYPE))
38#define RSQRT_SIZE (sizeof (NAME (rsqrt_input)) / sizeof (TYPE))
39
40static TYPE NAME (div_expected)[DIV_SIZE] __attribute__((__aligned__(32)));
41static TYPE NAME (div_output)  [DIV_SIZE] __attribute__((__aligned__(32)));
42
43static TYPE NAME (rsqrt_expected)[RSQRT_SIZE] __attribute__((__aligned__(32)));
44static TYPE NAME (rsqrt_output)  [RSQRT_SIZE] __attribute__((__aligned__(32)));
45
46
47/*
48 * Crack a floating point number into sign bit, exponent, and mantissa.
49 */
50
51static void
52NAME (crack) (TYPE number, unsigned int *p_sign, unsigned *p_exponent, UNS_TYPE *p_mantissa)
53{
54  NAME (union) u;
55  UNS_TYPE bits;
56
57  u.x = number;
58  bits = u.i;
59
60  *p_sign = (unsigned int)((bits >> (EXP_SIZE + MAN_SIZE)) & 0x1);
61  *p_exponent = (unsigned int)((bits >> MAN_SIZE) & ((((UNS_TYPE)1) << EXP_SIZE) - 1));
62  *p_mantissa = bits & ((((UNS_TYPE)1) << MAN_SIZE) - 1);
63  return;
64}
65
66
67/*
68 * Prevent optimizer from eliminating + 0.0 to remove -0.0.
69 */
70
71volatile TYPE NAME (math_diff_0) = ((TYPE) 0.0);
72
73/*
74 * Return negative if two numbers are significanly different or return the
75 * number of bits that are different in the mantissa.
76 */
77
78static int
79NAME (math_diff) (TYPE a, TYPE b, int bits)
80{
81  TYPE zero = NAME (math_diff_0);
82  unsigned int sign_a, sign_b;
83  unsigned int exponent_a, exponent_b;
84  UNS_TYPE mantissa_a, mantissa_b, diff;
85  int i;
86
87  /* eliminate signed zero.  */
88  a += zero;
89  b += zero;
90
91  /* special case Nan.  */
92  if (__builtin_isnan (a))
93    return (__builtin_isnan (b) ? 0 : -1);
94
95  if (a == b)
96    return 0;
97
98  /* special case infinity.  */
99  if (__builtin_isinf (a))
100    return (__builtin_isinf (b) ? 0 : -1);
101
102  /* punt on denormal numbers.  */
103  if (!__builtin_isnormal (a) || !__builtin_isnormal (b))
104    return -1;
105
106  NAME (crack) (a, &sign_a, &exponent_a, &mantissa_a);
107  NAME (crack) (b, &sign_b, &exponent_b, &mantissa_b);
108
109  /* If the sign is different, there is no hope.  */
110  if (sign_a != sign_b)
111    return -1;
112
113  /* If the exponent is off by 1, see if the values straddle the power of two,
114     and adjust things to do the mantassa check if we can.  */
115  if ((exponent_a == (exponent_b+1)) || (exponent_a == (exponent_b-1)))
116    {
117      TYPE big = FMAX (a, b);
118      TYPE small = FMIN (a, b);
119      TYPE diff = FABS (a - b);
120      unsigned int sign_big, sign_small, sign_test;
121      unsigned int exponent_big, exponent_small, exponent_test;
122      UNS_TYPE mantissa_big, mantissa_small, mantissa_test;
123
124      NAME (crack) (big, &sign_big, &exponent_big, &mantissa_big);
125      NAME (crack) (small, &sign_small, &exponent_small, &mantissa_small);
126
127      NAME (crack) (small - diff, &sign_test, &exponent_test, &mantissa_test);
128      if ((sign_test == sign_small) && (exponent_test == exponent_small))
129	{
130	  mantissa_a = mantissa_small;
131	  mantissa_b = mantissa_test;
132	}
133
134      else
135	{
136	  NAME (crack) (big + diff, &sign_test, &exponent_test, &mantissa_test);
137	  if ((sign_test == sign_big) && (exponent_test == exponent_big))
138	    {
139	      mantissa_a = mantissa_big;
140	      mantissa_b = mantissa_test;
141	    }
142
143	  else
144	    return -1;
145	}
146    }
147
148  else if (exponent_a != exponent_b)
149    return -1;
150
151  diff = UNS_ABS (mantissa_a - mantissa_b);
152  for (i = MAN_SIZE; i > 0; i--)
153    {
154      if ((diff & ((UNS_TYPE)1) << (i-1)) != 0)
155	return i;
156    }
157
158  return -1;
159}
160
161
162/*
163 * Turn off inlining to make code inspection easier.
164 */
165
166static void NAME (asm_div) (void) __attribute__((__noinline__));
167static void NAME (vector_div) (void) __attribute__((__noinline__));
168static void NAME (scalar_div) (void) __attribute__((__noinline__));
169static void NAME (asm_rsqrt) (void) __attribute__((__noinline__));
170static void NAME (vector_rsqrt) (void) __attribute__((__noinline__));
171static void NAME (scalar_rsqrt) (void) __attribute__((__noinline__));
172static void NAME (check_div) (const char *) __attribute__((__noinline__));
173static void NAME (check_rsqrt) (const char *) __attribute__((__noinline__));
174static void NAME (run) (void) __attribute__((__noinline__));
175
176
177/*
178 * Division function that might be vectorized.
179 */
180
181static void
182NAME (vector_div) (void)
183{
184  size_t i;
185
186  for (i = 0; i < DIV_SIZE; i++)
187    NAME (div_output)[i] = ((TYPE) 1.0) / NAME (div_input)[i].x;
188}
189
190/*
191 * Division function that is not vectorized.
192 */
193
194static void
195NAME (scalar_div) (void)
196{
197  size_t i;
198
199  for (i = 0; i < DIV_SIZE; i++)
200    {
201      TYPE x = ((TYPE) 1.0) / NAME (div_input)[i].x;
202      TYPE y;
203      __asm__ ("" : "=d" (y) : "0" (x));
204      NAME (div_output)[i] = y;
205    }
206}
207
208/*
209 * Generate the division instruction via asm.
210 */
211
212static void
213NAME (asm_div) (void)
214{
215  size_t i;
216
217  for (i = 0; i < DIV_SIZE; i++)
218    {
219      TYPE x;
220      __asm__ (ASMDIV " %0,%1,%2"
221	       : "=d" (x)
222	       : "d" ((TYPE) 1.0), "d" (NAME (div_input)[i].x));
223      NAME (div_expected)[i] = x;
224    }
225}
226
227/*
228 * Reciprocal square root function that might be vectorized.
229 */
230
231static void
232NAME (vector_rsqrt) (void)
233{
234  size_t i;
235
236  for (i = 0; i < RSQRT_SIZE; i++)
237    NAME (rsqrt_output)[i] = ((TYPE) 1.0) / SQRT (NAME (rsqrt_input)[i].x);
238}
239
240/*
241 * Reciprocal square root function that is not vectorized.
242 */
243
244static void
245NAME (scalar_rsqrt) (void)
246{
247  size_t i;
248
249  for (i = 0; i < RSQRT_SIZE; i++)
250    {
251      TYPE x = ((TYPE) 1.0) / SQRT (NAME (rsqrt_input)[i].x);
252      TYPE y;
253      __asm__ ("" : "=d" (y) : "0" (x));
254      NAME (rsqrt_output)[i] = y;
255    }
256}
257
258/*
259 * Generate the 1/sqrt instructions via asm.
260 */
261
262static void
263NAME (asm_rsqrt) (void)
264{
265  size_t i;
266
267  for (i = 0; i < RSQRT_SIZE; i++)
268    {
269      TYPE x;
270      TYPE y;
271      __asm__ (ASMSQRT " %0,%1" : "=d" (x) : "d" (NAME (rsqrt_input)[i].x));
272      __asm__ (ASMDIV " %0,%1,%2" : "=d" (y) : "d" ((TYPE) 1.0), "d" (x));
273      NAME (rsqrt_expected)[i] = y;
274    }
275}
276
277
278/*
279 * Functions to abort or report errors.
280 */
281
282static int NAME (error_count) = 0;
283
284#ifdef VERBOSE
285static int NAME (max_bits_div)   = 0;
286static int NAME (max_bits_rsqrt) = 0;
287#endif
288
289
290/*
291 * Compare the expected value with the value we got.
292 */
293
294static void
295NAME (check_div) (const char *test)
296{
297  size_t i;
298  int b;
299
300  for (i = 0; i < DIV_SIZE; i++)
301    {
302      TYPE exp = NAME (div_expected)[i];
303      TYPE out = NAME (div_output)[i];
304      b = NAME (math_diff) (exp, out, BDIV);
305
306#ifdef VERBOSE
307      if (b != 0)
308	{
309	  NAME (union) u_in = NAME (div_input)[i];
310	  NAME (union) u_exp;
311	  NAME (union) u_out;
312	  char explanation[64];
313	  const char *p_exp;
314
315	  if (b < 0)
316	    p_exp = "failed";
317	  else
318	    {
319	      p_exp = explanation;
320	      sprintf (explanation, "%d bit error%s", b, (b > BDIV) ? ", failed" : "");
321	    }
322
323	  u_exp.x = exp;
324	  u_out.x = out;
325	  printf ("%s %s %s for 1.0 / %g [0x%llx], expected %g [0x%llx], got %g [0x%llx]\n",
326		  TNAME (TYPE), test, p_exp,
327		  (double) u_in.x, (unsigned long long) u_in.i,
328		  (double) exp,    (unsigned long long) u_exp.i,
329		  (double) out,    (unsigned long long) u_out.i);
330	}
331#endif
332
333      if (b < 0 || b > BDIV)
334	NAME (error_count)++;
335
336#ifdef VERBOSE
337      if (b > NAME (max_bits_div))
338	NAME (max_bits_div) = b;
339#endif
340    }
341}
342
343static void
344NAME (check_rsqrt) (const char *test)
345{
346  size_t i;
347  int b;
348
349  for (i = 0; i < RSQRT_SIZE; i++)
350    {
351      TYPE exp = NAME (rsqrt_expected)[i];
352      TYPE out = NAME (rsqrt_output)[i];
353      b = NAME (math_diff) (exp, out, BRSQRT);
354
355#ifdef VERBOSE
356      if (b != 0)
357	{
358	  NAME (union) u_in = NAME (rsqrt_input)[i];
359	  NAME (union) u_exp;
360	  NAME (union) u_out;
361	  char explanation[64];
362	  const char *p_exp;
363
364	  if (b < 0)
365	    p_exp = "failed";
366	  else
367	    {
368	      p_exp = explanation;
369	      sprintf (explanation, "%d bit error%s", b, (b > BDIV) ? ", failed" : "");
370	    }
371
372	  u_exp.x = exp;
373	  u_out.x = out;
374	  printf ("%s %s %s for 1 / sqrt (%g) [0x%llx], expected %g [0x%llx], got %g [0x%llx]\n",
375		  TNAME (TYPE), test, p_exp,
376		  (double) u_in.x, (unsigned long long) u_in.i,
377		  (double) exp,    (unsigned long long) u_exp.i,
378		  (double) out,    (unsigned long long) u_out.i);
379	}
380#endif
381
382      if (b < 0 || b > BRSQRT)
383	NAME (error_count)++;
384
385#ifdef VERBOSE
386      if (b > NAME (max_bits_rsqrt))
387	NAME (max_bits_rsqrt) = b;
388#endif
389    }
390}
391
392
393/*
394 * Now do everything.
395 */
396
397static void
398NAME (run) (void)
399{
400#ifdef VERBOSE
401  printf ("start run_%s, divide size = %ld, rsqrt size = %ld, %d bit%s for a/b, %d bit%s for 1/sqrt(a)\n",
402	  TNAME (TYPE),
403	  (long)DIV_SIZE,
404	  (long)RSQRT_SIZE,
405	  BDIV, (BDIV == 1) ? "" : "s",
406	  BRSQRT, (BRSQRT == 1) ? "" : "s");
407#endif
408
409  NAME (asm_div) ();
410
411  NAME (scalar_div) ();
412  NAME (check_div) ("scalar");
413
414  NAME (vector_div) ();
415  NAME (check_div) ("vector");
416
417  NAME (asm_rsqrt) ();
418
419  NAME (scalar_rsqrt) ();
420  NAME (check_rsqrt) ("scalar");
421
422  NAME (vector_rsqrt) ();
423  NAME (check_rsqrt) ("vector");
424
425#ifdef VERBOSE
426  printf ("end run_%s, errors = %d, max div bits = %d, max rsqrt bits = %d\n",
427	  TNAME (TYPE),
428	  NAME (error_count),
429	  NAME (max_bits_div),
430	  NAME (max_bits_rsqrt));
431#endif
432}
433