1/* { dg-do run } */ 2/* { dg-require-effective-target sse4 } */ 3/* { dg-options "-O2 -msse4.1" } */ 4 5#ifndef CHECK_H 6#define CHECK_H "sse4_1-check.h" 7#endif 8 9#ifndef TEST 10#define TEST sse4_1_test 11#endif 12 13#include CHECK_H 14 15#include <smmintrin.h> 16 17#include <string.h> 18 19#define lmskN 0x00 20#define lmsk0 0x01 21#define lmsk1 0x02 22#define lmsk2 0x04 23#define lmsk3 0x08 24#define lmsk01 0x03 25#define lmsk02 0x05 26#define lmsk03 0x09 27#define lmsk12 0x06 28#define lmsk13 0x0A 29#define lmsk23 0x0C 30#define lmskA 0x0F 31 32#define hmskN 0x00 33#define hmskA 0xF0 34#define hmsk0 0x10 35#define hmsk1 0x20 36#define hmsk2 0x40 37#define hmsk3 0x80 38#define hmsk01 0x30 39#define hmsk02 0x50 40#define hmsk03 0x90 41#define hmsk12 0x60 42#define hmsk13 0xA0 43#define hmsk23 0xC0 44 45#ifndef HIMASK 46#define HIMASK hmskA 47#endif 48 49#ifndef LOMASK 50#define LOMASK lmskA 51#endif 52 53static void 54TEST (void) 55{ 56 union 57 { 58 __m128 x; 59 float f[4]; 60 } val1[16], val2[16], res[16], chk[16]; 61 int i,j; 62 float tmp; 63 64 for (i = 0; i < 16; i++) 65 { 66 val1[i].f[0] = 2.; 67 val1[i].f[1] = 3.; 68 val1[i].f[2] = 4.; 69 val1[i].f[3] = 5.; 70 71 val2[i].f[0] = 10.; 72 val2[i].f[1] = 100.; 73 val2[i].f[2] = 1000.; 74 val2[i].f[3] = 10000.; 75 76 tmp = 0.; 77 for (j = 0; j < 4; j++) 78 if ((HIMASK & (0x10 << j))) 79 tmp += val1[i].f [j] * val2[i].f [j]; 80 81 for (j = 0; j < 4; j++) 82 if ((LOMASK & (1 << j))) 83 chk[i].f[j] = tmp; 84 } 85 86 for (i = 0; i < 16; i++) 87 { 88 res[i].x = _mm_dp_ps (val1[i].x, val2[i].x, HIMASK | LOMASK); 89 if (memcmp (&res[i], &chk[i], sizeof (chk[i]))) 90 abort (); 91 } 92} 93