1302329Sbapt#include <smmintrin.h> 2302329Sbapt#include <math.h> 3302329Sbapt 4302329Sbapt#define NUM 64 5302329Sbapt 6302329Sbaptstatic void 7302329Sbaptinit_round (FP_T *src) 8302329Sbapt{ 9302329Sbapt int i, sign = 1; 10302329Sbapt FP_T f = rand (); 11302329Sbapt 12302329Sbapt for (i = 0; i < NUM; i++) 13302329Sbapt { 14302329Sbapt src[i] = (i + 1)* f * M_PI * sign; 15302329Sbapt if (i < (NUM / 2)) 16302329Sbapt { 17302329Sbapt if ((i % 6) == 0) 18302329Sbapt f = f * src[i]; 19302329Sbapt } 20302329Sbapt else if (i == (NUM / 2)) 21302329Sbapt f = rand (); 22302329Sbapt else if ((i % 6) == 0) 23302329Sbapt f = 1 / (f * (i + 1) * src[i] * M_PI *sign); 24302329Sbapt sign = -sign; 25302329Sbapt } 26302329Sbapt} 27302329Sbapt 28302329Sbaptstatic FP_T 29302329Sbaptdo_round (FP_T f, int type) 30302329Sbapt{ 31302329Sbapt short saved_cw, new_cw, clr_mask; 32302329Sbapt FP_T ret; 33302329Sbapt 34302329Sbapt if ((type & 4)) 35302329Sbapt { 36302329Sbapt type = 0; 37302329Sbapt clr_mask = 0xFFFF; 38302329Sbapt } 39302329Sbapt else 40302329Sbapt { 41302329Sbapt type = 0x003F | ((type & 3) << 10); 42302329Sbapt clr_mask = ~0x0C3F; 43302329Sbapt } 44302329Sbapt 45302329Sbapt __asm__ ("fld" ASM_SUFFIX " %0" : : "m" (*&f)); 46302329Sbapt 47302329Sbapt __asm__ ("fstcw %0" : "=m" (*&saved_cw)); 48302329Sbapt new_cw = saved_cw & clr_mask; 49302329Sbapt new_cw |= type; 50302329Sbapt __asm__ ("fldcw %0" : : "m" (*&new_cw)); 51302329Sbapt 52302329Sbapt __asm__ ("frndint\n" 53302329Sbapt "fstp" ASM_SUFFIX " %0\n" : "=m" (*&ret)); 54302329Sbapt __asm__ ("fldcw %0" : : "m" (*&saved_cw)); 55302329Sbapt return ret; 56302329Sbapt} 57302329Sbapt 58302329Sbaptstatic void 59302329Sbaptsse4_1_test (void) 60302329Sbapt{ 61302329Sbapt int i; 62302329Sbapt FP_T f; 63302329Sbapt union 64302329Sbapt { 65302329Sbapt VEC_T x[NUM / LOOP_INCREMENT]; 66302329Sbapt FP_T f[NUM]; 67302329Sbapt } dst, src; 68302329Sbapt 69302329Sbapt init_round (src.f); 70302329Sbapt 71302329Sbapt for (i = 0; i < NUM / LOOP_INCREMENT; i++) 72302329Sbapt dst.x[i] = ROUND_INTRIN (src.x[i], ROUND_MODE); 73302329Sbapt 74302329Sbapt for (i = 0; i < NUM; i += CHECK_LOOP_INCREMENT) 75302329Sbapt { 76302329Sbapt f = do_round (src.f[i], CHECK_ROUND_MODE); 77302329Sbapt if (f != dst.f[i]) 78302329Sbapt abort (); 79302329Sbapt } 80302329Sbapt 81302329Sbapt if (_MM_FROUND_TO_NEAREST_INT != 0x00 82302329Sbapt || _MM_FROUND_TO_NEG_INF != 0x01 83302329Sbapt || _MM_FROUND_TO_POS_INF != 0x02 84302329Sbapt || _MM_FROUND_TO_ZERO != 0x03 85302329Sbapt || _MM_FROUND_CUR_DIRECTION != 0x04 86302329Sbapt || _MM_FROUND_RAISE_EXC != 0x00 87302329Sbapt || _MM_FROUND_NO_EXC != 0x08 88302329Sbapt || _MM_FROUND_NINT != 0x00 89302329Sbapt || _MM_FROUND_FLOOR != 0x01 90302329Sbapt || _MM_FROUND_CEIL != 0x02 91302329Sbapt || _MM_FROUND_TRUNC != 0x03 92302329Sbapt || _MM_FROUND_RINT != 0x04 93302329Sbapt || _MM_FROUND_NEARBYINT != 0x0C) 94302329Sbapt abort (); 95302329Sbapt} 96302329Sbapt