1/* { dg-do run } */
2/* { dg-require-effective-target sse4 } */
3/* { dg-options "-O2 -msse4.1" } */
4
5#ifndef CHECK_H
6#define CHECK_H "sse4_1-check.h"
7#endif
8
9#ifndef TEST
10#define TEST sse4_1_test
11#endif
12
13#include CHECK_H
14
15#include <smmintrin.h>
16
17#include <string.h>
18
19#define lmskN  0x00
20#define lmsk0  0x01
21#define lmsk1  0x02
22#define lmsk2  0x04
23#define lmsk3  0x08
24#define lmsk01 0x03
25#define lmsk02 0x05
26#define lmsk03 0x09
27#define lmsk12 0x06
28#define lmsk13 0x0A
29#define lmsk23 0x0C
30#define lmskA  0x0F
31
32#define hmskN  0x00
33#define hmskA  0xF0
34#define hmsk0  0x10
35#define hmsk1  0x20
36#define hmsk2  0x40
37#define hmsk3  0x80
38#define hmsk01 0x30
39#define hmsk02 0x50
40#define hmsk03 0x90
41#define hmsk12 0x60
42#define hmsk13 0xA0
43#define hmsk23 0xC0
44
45#ifndef HIMASK
46#define HIMASK hmskA
47#endif
48
49#ifndef LOMASK
50#define LOMASK lmskA
51#endif
52
53static void
54TEST (void)
55{
56  union
57    {
58      __m128 x;
59      float f[4];
60    } val1[16], val2[16], res[16], chk[16];
61  int i,j;
62  float tmp;
63
64  for (i = 0; i < 16; i++)
65    {
66      val1[i].f[0] = 2.;
67      val1[i].f[1] = 3.;
68      val1[i].f[2] = 4.;
69      val1[i].f[3] = 5.;
70
71      val2[i].f[0] = 10.;
72      val2[i].f[1] = 100.;
73      val2[i].f[2] = 1000.;
74      val2[i].f[3] = 10000.;
75
76      tmp = 0.;
77      for (j = 0; j < 4; j++)
78        if ((HIMASK & (0x10 << j)))
79	  tmp += val1[i].f [j] * val2[i].f [j];
80
81      for (j = 0; j < 4; j++)
82	if ((LOMASK & (1 << j)))
83	  chk[i].f[j] = tmp;
84    }
85
86   for (i = 0; i < 16; i++)
87     {
88       res[i].x = _mm_dp_ps (val1[i].x, val2[i].x, HIMASK | LOMASK);
89       if (memcmp (&res[i], &chk[i], sizeof (chk[i])))
90	 abort ();
91     }
92}
93