1302329Sbapt#include <smmintrin.h>
2302329Sbapt#include <math.h>
3302329Sbapt
4302329Sbapt#define NUM 64
5302329Sbapt
6302329Sbaptstatic void
7302329Sbaptinit_round (FP_T *src)
8302329Sbapt{
9302329Sbapt  int i, sign = 1;
10302329Sbapt  FP_T f = rand ();
11302329Sbapt
12302329Sbapt  for (i = 0; i < NUM; i++)
13302329Sbapt    {
14302329Sbapt      src[i] = (i + 1)* f * M_PI * sign;
15302329Sbapt      if (i < (NUM / 2))
16302329Sbapt	{
17302329Sbapt          if ((i % 6) == 0)
18302329Sbapt	    f = f * src[i];
19302329Sbapt        }
20302329Sbapt      else if (i == (NUM / 2))
21302329Sbapt	f = rand ();
22302329Sbapt      else if ((i % 6) == 0)
23302329Sbapt	f = 1 / (f * (i + 1) * src[i] * M_PI *sign);
24302329Sbapt      sign = -sign;
25302329Sbapt    }
26302329Sbapt}
27302329Sbapt
28302329Sbaptstatic FP_T
29302329Sbaptdo_round (FP_T f, int type)
30302329Sbapt{
31302329Sbapt  short saved_cw, new_cw, clr_mask;
32302329Sbapt  FP_T ret;
33302329Sbapt
34302329Sbapt  if ((type & 4))
35302329Sbapt    {
36302329Sbapt      type = 0;
37302329Sbapt      clr_mask = 0xFFFF;
38302329Sbapt    }
39302329Sbapt  else
40302329Sbapt    {
41302329Sbapt      type = 0x003F | ((type & 3) << 10);
42302329Sbapt      clr_mask = ~0x0C3F;
43302329Sbapt    }
44302329Sbapt
45302329Sbapt  __asm__ ("fld" ASM_SUFFIX " %0" : : "m" (*&f));
46302329Sbapt
47302329Sbapt  __asm__ ("fstcw %0" : "=m" (*&saved_cw));
48302329Sbapt  new_cw = saved_cw & clr_mask;
49302329Sbapt  new_cw |= type;
50302329Sbapt  __asm__ ("fldcw %0" : : "m" (*&new_cw));
51302329Sbapt
52302329Sbapt  __asm__ ("frndint\n"
53302329Sbapt	   "fstp" ASM_SUFFIX " %0\n" : "=m" (*&ret));
54302329Sbapt  __asm__ ("fldcw %0" : : "m" (*&saved_cw));
55302329Sbapt  return ret;
56302329Sbapt}
57302329Sbapt
58302329Sbaptstatic void
59302329Sbaptsse4_1_test (void)
60302329Sbapt{
61302329Sbapt  int i;
62302329Sbapt  FP_T f;
63302329Sbapt  union
64302329Sbapt    {
65302329Sbapt      VEC_T x[NUM / LOOP_INCREMENT];
66302329Sbapt      FP_T f[NUM];
67302329Sbapt    } dst, src;
68302329Sbapt
69302329Sbapt  init_round (src.f);
70302329Sbapt
71302329Sbapt  for (i = 0; i < NUM / LOOP_INCREMENT; i++)
72302329Sbapt    dst.x[i] =  ROUND_INTRIN (src.x[i], ROUND_MODE);
73302329Sbapt
74302329Sbapt  for (i = 0; i < NUM; i += CHECK_LOOP_INCREMENT)
75302329Sbapt    {
76302329Sbapt      f = do_round (src.f[i], CHECK_ROUND_MODE);
77302329Sbapt     if (f != dst.f[i])
78302329Sbapt       abort ();
79302329Sbapt    }
80302329Sbapt
81302329Sbapt  if (_MM_FROUND_TO_NEAREST_INT != 0x00
82302329Sbapt      || _MM_FROUND_TO_NEG_INF != 0x01
83302329Sbapt      || _MM_FROUND_TO_POS_INF != 0x02
84302329Sbapt      || _MM_FROUND_TO_ZERO != 0x03
85302329Sbapt      || _MM_FROUND_CUR_DIRECTION != 0x04
86302329Sbapt      || _MM_FROUND_RAISE_EXC != 0x00
87302329Sbapt      || _MM_FROUND_NO_EXC != 0x08
88302329Sbapt      || _MM_FROUND_NINT != 0x00
89302329Sbapt      || _MM_FROUND_FLOOR != 0x01
90302329Sbapt      || _MM_FROUND_CEIL != 0x02
91302329Sbapt      || _MM_FROUND_TRUNC != 0x03
92302329Sbapt      || _MM_FROUND_RINT != 0x04
93302329Sbapt      || _MM_FROUND_NEARBYINT != 0x0C)
94302329Sbapt    abort ();
95302329Sbapt}
96302329Sbapt