1/* { dg-do run } */
2/* { dg-require-effective-target xop } */
3/* { dg-options "-O2 -mxop" } */
4
5#include "xop-check.h"
6
7#include <x86intrin.h>
8#include <string.h>
9
10#define NUM 10
11
12union
13{
14  __m128i x[NUM];
15  signed char ssi[NUM * 16];
16  short si[NUM * 8];
17  int li[NUM * 4];
18  long long lli[NUM * 2];
19} dst, res, src1;
20
21static void
22init_sbyte ()
23{
24  int i;
25  for (i=0; i < NUM * 16; i++)
26    src1.ssi[i] = i;
27}
28
29static void
30init_sword ()
31{
32  int i;
33  for (i=0; i < NUM * 8; i++)
34    src1.si[i] = i;
35}
36
37
38static void
39init_sdword ()
40{
41  int i;
42  for (i=0; i < NUM * 4; i++)
43    src1.li[i] = i;
44}
45
46static int
47check_sbyte2word ()
48{
49  int i, j, s, t, check_fails = 0;
50  for (i = 0; i < NUM * 16; i = i + 16)
51    {
52      for (j = 0; j < 8; j++)
53	{
54	  t = i + (2 * j);
55	  s = (i / 2) + j;
56	  res.si[s] = src1.ssi[t] + src1.ssi[t + 1] ;
57	  if (res.si[s] != dst.si[s])
58	    check_fails++;
59	}
60    }
61}
62
63static int
64check_sbyte2dword ()
65{
66  int i, j, s, t, check_fails = 0;
67  for (i = 0; i < NUM * 16; i = i + 16)
68    {
69      for (j = 0; j < 4; j++)
70	{
71	  t = i + (4 * j);
72	  s = (i / 4) + j;
73	  res.li[s] = (src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2]
74	              + src1.ssi[t + 3]);
75	  if (res.li[s] != dst.li[s])
76	    check_fails++;
77	}
78    }
79  return check_fails++;
80}
81
82static int
83check_sbyte2qword ()
84{
85  int i, j, s, t, check_fails = 0;
86  for (i = 0; i < NUM * 16; i = i + 16)
87    {
88      for (j = 0; j < 2; j++)
89	{
90	  t = i + (8 * j);
91	  s = (i / 8) + j;
92	  res.lli[s] = ((src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2]
93		       + src1.ssi[t + 3])) + ((src1.ssi[t + 4] + src1.ssi[t +5])
94	               + (src1.ssi[t + 6] + src1.ssi[t + 7]));
95	  if (res.lli[s] != dst.lli[s])
96	    check_fails++;
97	}
98    }
99  return check_fails++;
100}
101
102static int
103check_sword2dword ()
104{
105  int i, j, s, t, check_fails = 0;
106  for (i = 0; i < (NUM * 8); i = i + 8)
107    {
108      for (j = 0; j < 4; j++)
109	{
110	  t = i + (2 * j);
111	  s = (i / 2) + j;
112	  res.li[s] = src1.si[t] + src1.si[t + 1] ;
113	  if (res.li[s] != dst.li[s])
114	    check_fails++;
115	}
116    }
117}
118
119static int
120check_sword2qword ()
121{
122  int i, j, s, t, check_fails = 0;
123  for (i = 0; i < NUM * 8; i = i + 8)
124    {
125      for (j = 0; j < 2; j++)
126	{
127	  t = i + (4 * j);
128	  s = (i / 4) + j;
129	  res.lli[s] = (src1.si[t] + src1.si[t + 1]) + (src1.si[t + 2]
130	               + src1.si[t + 3]);
131	  if (res.lli[s] != dst.lli[s])
132	    check_fails++;
133	}
134    }
135  return check_fails++;
136}
137
138static int
139check_dword2qword ()
140{
141  int i, j, s, t, check_fails = 0;
142  for (i = 0; i < (NUM * 4); i = i + 4)
143    {
144      for (j = 0; j < 2; j++)
145	{
146	  t = i + (2 * j);
147	  s = (i / 2) + j;
148	  res.lli[s] = src1.li[t] + src1.li[t + 1] ;
149	  if (res.lli[s] != dst.lli[s])
150	    check_fails++;
151	}
152    }
153}
154
155static void
156xop_test (void)
157{
158  int i;
159
160  init_sbyte ();
161
162  for (i = 0; i < NUM; i++)
163    dst.x[i] = _mm_haddw_epi8 (src1.x[i]);
164
165  if (check_sbyte2word())
166  abort ();
167
168
169  for (i = 0; i < (NUM ); i++)
170    dst.x[i] = _mm_haddd_epi8 (src1.x[i]);
171
172  if (check_sbyte2dword())
173    abort ();
174
175
176  for (i = 0; i < NUM; i++)
177    dst.x[i] = _mm_haddq_epi8 (src1.x[i]);
178
179  if (check_sbyte2qword())
180    abort ();
181
182
183  init_sword ();
184
185  for (i = 0; i < (NUM ); i++)
186    dst.x[i] = _mm_haddd_epi16 (src1.x[i]);
187
188  if (check_sword2dword())
189    abort ();
190
191  for (i = 0; i < NUM; i++)
192    dst.x[i] = _mm_haddq_epi16 (src1.x[i]);
193
194  if (check_sword2qword())
195    abort ();
196
197
198  init_sdword ();
199
200    for (i = 0; i < NUM; i++)
201    dst.x[i] = _mm_haddq_epi32 (src1.x[i]);
202
203  if (check_dword2qword())
204    abort ();
205
206}
207