1262182Semaste/* { dg-do run } */
2262182Semaste/* { dg-require-effective-target xop } */
3262182Semaste/* { dg-options "-O2 -mxop" } */
4262182Semaste
5262182Semaste#include "xop-check.h"
6262182Semaste
7262182Semaste#include <x86intrin.h>
8262182Semaste#include <string.h>
9262182Semaste
10262182Semaste#define NUM 10
11262182Semaste
12262182Semasteunion
13262182Semaste{
14262182Semaste  __m128i x[NUM];
15262182Semaste  unsigned char  ssi[NUM * 16];
16262182Semaste  unsigned short si[NUM * 8];
17262182Semaste  unsigned int li[NUM * 4];
18262182Semaste  unsigned long long  lli[NUM * 2];
19262182Semaste} dst, res, src1;
20262182Semaste
21262182Semastestatic void
22262182Semasteinit_byte ()
23262182Semaste{
24262182Semaste  int i;
25262182Semaste  for (i=0; i < NUM * 16; i++)
26262182Semaste    src1.ssi[i] = i;
27262182Semaste}
28262182Semaste
29262182Semastestatic void
30262182Semasteinit_word ()
31262182Semaste{
32262182Semaste  int i;
33262182Semaste  for (i=0; i < NUM * 8; i++)
34262182Semaste    src1.si[i] = i;
35262182Semaste}
36262182Semaste
37262182Semaste
38262182Semastestatic void
39262182Semasteinit_dword ()
40262182Semaste{
41262182Semaste  int i;
42262182Semaste  for (i=0; i < NUM * 4; i++)
43262182Semaste    src1.li[i] = i;
44262182Semaste}
45262182Semaste
46262182Semastestatic int
47262182Semastecheck_byte2word ()
48262182Semaste{
49262182Semaste  int i, j, s, t, check_fails = 0;
50262182Semaste  for (i = 0; i < NUM * 16; i = i + 16)
51262182Semaste    {
52262182Semaste      for (j = 0; j < 8; j++)
53262182Semaste	{
54262182Semaste	  t = i + (2 * j);
55262182Semaste	  s = (i / 2) + j;
56262182Semaste	  res.si[s] = src1.ssi[t] + src1.ssi[t + 1] ;
57262182Semaste	  if (res.si[s] != dst.si[s])
58262182Semaste	    check_fails++;
59262182Semaste	}
60262182Semaste    }
61262182Semaste}
62262182Semaste
63262182Semastestatic int
64262182Semastecheck_byte2dword ()
65262182Semaste{
66262182Semaste  int i, j, s, t, check_fails = 0;
67262182Semaste  for (i = 0; i < NUM * 16; i = i + 16)
68262182Semaste    {
69262182Semaste      for (j = 0; j < 4; j++)
70262182Semaste	{
71262182Semaste	  t = i + (4 * j);
72262182Semaste	  s = (i / 4) + j;
73262182Semaste	  res.li[s] = (src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2]
74262182Semaste	              + src1.ssi[t + 3]);
75262182Semaste	  if (res.li[s] != dst.li[s])
76262182Semaste	    check_fails++;
77262182Semaste	}
78262182Semaste    }
79262182Semaste  return check_fails++;
80262182Semaste}
81262182Semaste
82262182Semastestatic int
83262182Semastecheck_byte2qword ()
84262182Semaste{
85262182Semaste  int i, j, s, t, check_fails = 0;
86262182Semaste  for (i = 0; i < NUM * 16; i = i + 16)
87262182Semaste    {
88262182Semaste      for (j = 0; j < 2; j++)
89262182Semaste	{
90262182Semaste	  t = i + (8 * j);
91262182Semaste	  s = (i / 8) + j;
92262182Semaste	  res.lli[s] = ((src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2]
93262182Semaste		       + src1.ssi[t + 3])) + ((src1.ssi[t + 4] + src1.ssi[t +5])
94262182Semaste	               + (src1.ssi[t + 6] + src1.ssi[t + 7]));
95262182Semaste	  if (res.lli[s] != dst.lli[s])
96262182Semaste	    check_fails++;
97262182Semaste	}
98262182Semaste    }
99262182Semaste  return check_fails++;
100262182Semaste}
101262182Semaste
102262182Semastestatic int
103262182Semastecheck_word2dword ()
104262182Semaste{
105262182Semaste  int i, j, s, t, check_fails = 0;
106262182Semaste  for (i = 0; i < (NUM * 8); i = i + 8)
107262182Semaste    {
108262182Semaste      for (j = 0; j < 4; j++)
109262182Semaste	{
110262182Semaste	  t = i + (2 * j);
111262182Semaste	  s = (i / 2) + j;
112262182Semaste	  res.li[s] = src1.si[t] + src1.si[t + 1] ;
113262182Semaste	  if (res.li[s] != dst.li[s])
114262182Semaste	    check_fails++;
115262182Semaste	}
116262182Semaste    }
117262182Semaste}
118262182Semaste
119262182Semastestatic int
120262182Semastecheck_word2qword ()
121262182Semaste{
122262182Semaste  int i, j, s, t, check_fails = 0;
123262182Semaste  for (i = 0; i < NUM * 8; i = i + 8)
124262182Semaste    {
125262182Semaste      for (j = 0; j < 2; j++)
126262182Semaste	{
127262182Semaste	  t = i + (4 * j);
128262182Semaste	  s = (i / 4) + j;
129262182Semaste	  res.lli[s] = (src1.si[t] + src1.si[t + 1]) + (src1.si[t + 2]
130262182Semaste	               + src1.si[t + 3]);
131262182Semaste	  if (res.lli[s] != dst.lli[s])
132262182Semaste	    check_fails++;
133262182Semaste	}
134262182Semaste    }
135262182Semaste  return check_fails++;
136262182Semaste}
137262182Semaste
138262182Semastestatic int
139262182Semastecheck_dword2qword ()
140262182Semaste{
141262182Semaste  int i, j, s, t, check_fails = 0;
142262182Semaste  for (i = 0; i < (NUM * 4); i = i + 4)
143262182Semaste    {
144262182Semaste      for (j = 0; j < 2; j++)
145262182Semaste	{
146262182Semaste	  t = i + (2 * j);
147262182Semaste	  s = (i / 2) + j;
148262182Semaste	  res.lli[s] = src1.li[t] + src1.li[t + 1] ;
149262182Semaste	  if (res.lli[s] != dst.lli[s])
150262182Semaste	    check_fails++;
151262182Semaste	}
152262182Semaste    }
153262182Semaste}
154262182Semaste
155262182Semastestatic void
156262182Semastexop_test (void)
157262182Semaste{
158262182Semaste  int i;
159262182Semaste
160262182Semaste  /* Check haddubw */
161262182Semaste  init_byte ();
162262182Semaste
163262182Semaste  for (i = 0; i < NUM; i++)
164262182Semaste    dst.x[i] = _mm_haddw_epu8 (src1.x[i]);
165262182Semaste
166262182Semaste  if (check_byte2word())
167262182Semaste  abort ();
168262182Semaste
169262182Semaste  /* Check haddubd */
170262182Semaste  for (i = 0; i < (NUM ); i++)
171262182Semaste    dst.x[i] = _mm_haddd_epu8 (src1.x[i]);
172262182Semaste
173262182Semaste  if (check_byte2dword())
174262182Semaste    abort ();
175262182Semaste
176262182Semaste  /* Check haddubq */
177262182Semaste  for (i = 0; i < NUM; i++)
178262182Semaste    dst.x[i] = _mm_haddq_epu8 (src1.x[i]);
179262182Semaste
180262182Semaste  if (check_byte2qword())
181262182Semaste    abort ();
182262182Semaste
183262182Semaste  /* Check hadduwd */
184262182Semaste  init_word ();
185262182Semaste
186262182Semaste  for (i = 0; i < (NUM ); i++)
187262182Semaste    dst.x[i] = _mm_haddd_epu16 (src1.x[i]);
188262182Semaste
189262182Semaste  if (check_word2dword())
190262182Semaste    abort ();
191262182Semaste
192262182Semaste  /* Check haddbuwq */
193262182Semaste
194262182Semaste  for (i = 0; i < NUM; i++)
195262182Semaste    dst.x[i] = _mm_haddq_epu16 (src1.x[i]);
196262182Semaste
197262182Semaste  if (check_word2qword())
198262182Semaste    abort ();
199262182Semaste
200262182Semaste  /* Check hadudq */
201262182Semaste  init_dword ();
202262182Semaste    for (i = 0; i < NUM; i++)
203262182Semaste    dst.x[i] = _mm_haddq_epu32 (src1.x[i]);
204262182Semaste
205262182Semaste  if (check_dword2qword())
206262182Semaste    abort ();
207262182Semaste}
208262182Semaste