1/* { dg-do run } */ 2/* { dg-require-effective-target sse4 } */ 3/* { dg-options "-O2 -msse4.1" } */ 4 5#ifndef CHECK_H 6#define CHECK_H "sse4_1-check.h" 7#endif 8 9#ifndef TEST 10#define TEST sse4_1_test 11#endif 12 13#include CHECK_H 14 15#include <smmintrin.h> 16 17#define lmskN 0x00 18#define lmsk0 0x01 19#define lmsk1 0x02 20#define lmsk2 0x04 21#define lmsk3 0x08 22#define lmsk01 0x03 23#define lmsk02 0x05 24#define lmsk03 0x09 25#define lmsk12 0x06 26#define lmsk13 0x0A 27#define lmsk23 0x0C 28#define lmskA 0x0F 29 30#define hmskN 0x00 31#define hmskA 0xF0 32#define hmsk0 0x10 33#define hmsk1 0x20 34#define hmsk2 0x40 35#define hmsk3 0x80 36#define hmsk01 0x30 37#define hmsk02 0x50 38#define hmsk03 0x90 39#define hmsk12 0x60 40#define hmsk13 0xA0 41#define hmsk23 0xC0 42 43#ifndef HIMASK 44#define HIMASK hmskA 45#endif 46 47static void 48TEST (void) 49{ 50 union 51 { 52 __m128 x; 53 float f[4]; 54 } val1, val2, res[16]; 55 int masks[16]; 56 int i, j; 57 58 val1.f[0] = 2.; 59 val1.f[1] = 3.; 60 val1.f[2] = 4.; 61 val1.f[3] = 5.; 62 63 val2.f[0] = 10.; 64 val2.f[1] = 100.; 65 val2.f[2] = 1000.; 66 val2.f[3] = 10000.; 67 68 res[0].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk0); 69 res[1].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk1); 70 res[2].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk2); 71 res[3].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk3); 72 res[4].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk01); 73 res[5].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk02); 74 res[6].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk03); 75 res[7].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk12); 76 res[8].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk13); 77 res[9].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmsk23); 78 res[10].x = _mm_dp_ps (val1.x, val2.x, HIMASK | (0x0F & ~lmsk0)); 79 res[11].x = _mm_dp_ps (val1.x, val2.x, HIMASK | (0x0F & ~lmsk1)); 80 res[12].x = _mm_dp_ps (val1.x, val2.x, HIMASK | (0x0F & ~lmsk2)); 81 res[13].x = _mm_dp_ps (val1.x, val2.x, HIMASK | (0x0F & ~lmsk3)); 82 res[14].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmskN); 83 res[15].x = _mm_dp_ps (val1.x, val2.x, HIMASK | lmskA); 84 85 masks[0] = HIMASK | lmsk0; 86 masks[1] = HIMASK | lmsk1; 87 masks[2] = HIMASK | lmsk2; 88 masks[3] = HIMASK | lmsk3; 89 masks[4] = HIMASK | lmsk01; 90 masks[5] = HIMASK | lmsk02; 91 masks[6] = HIMASK | lmsk03; 92 masks[7] = HIMASK | lmsk12; 93 masks[8] = HIMASK | lmsk13; 94 masks[9] = HIMASK | lmsk23; 95 masks[10] = HIMASK | (0x0F & ~lmsk0); 96 masks[11] = HIMASK | (0x0F & ~lmsk1); 97 masks[12] = HIMASK | (0x0F & ~lmsk2); 98 masks[13] = HIMASK | (0x0F & ~lmsk3); 99 masks[14] = HIMASK | lmskN; 100 masks[15] = HIMASK | lmskA; 101 102 for (i = 0; i <= 15; i++) 103 { 104 float tmp = 0.; 105 106 for (j = 0; j < 4; j++) 107 if ((HIMASK & (0x10 << j))) 108 tmp += val1.f[j] * val2.f[j]; 109 110 for (j = 0; j < 4; j++) 111 if ((masks[i] & (1 << j)) && res[i].f[j] != tmp) 112 abort (); 113 } 114} 115