1/* { dg-do run } */
2/* { dg-require-effective-target ssse3 } */
3/* { dg-options "-O2 -fno-strict-aliasing -mssse3" } */
4
5#ifndef CHECK_H
6#define CHECK_H "ssse3-check.h"
7#endif
8
9#ifndef TEST
10#define TEST ssse3_test
11#endif
12
13#include CHECK_H
14
15#include "ssse3-vals.h"
16
17#include <tmmintrin.h>
18#include <string.h>
19
20#ifndef __AVX__
21/* Test the 64-bit form */
22static void
23ssse3_test_palignr (int *i1, int *i2, unsigned int imm, int *r)
24{
25  __m64 t1 = *(__m64 *) i1;
26  __m64 t2 = *(__m64 *) i2;
27
28  switch (imm)
29    {
30    case 0:
31      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 0);
32      break;
33    case 1:
34      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 1);
35      break;
36    case 2:
37      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 2);
38      break;
39    case 3:
40      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 3);
41      break;
42    case 4:
43      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 4);
44      break;
45    case 5:
46      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 5);
47      break;
48    case 6:
49      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 6);
50      break;
51    case 7:
52      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 7);
53      break;
54    case 8:
55      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 8);
56      break;
57    case 9:
58      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 9);
59      break;
60    case 10:
61      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 10);
62      break;
63    case 11:
64      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 11);
65      break;
66    case 12:
67      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 12);
68      break;
69    case 13:
70      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 13);
71      break;
72    case 14:
73      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 14);
74      break;
75    case 15:
76      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 15);
77      break;
78    default:
79      *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 16);
80      break;
81    }
82
83   _mm_empty();
84}
85#endif
86
87/* Test the 128-bit form */
88static void
89ssse3_test_palignr128 (int *i1, int *i2, unsigned int imm, int *r)
90{
91  /* Assumes incoming pointers are 16-byte aligned */
92  __m128i t1 = *(__m128i *) i1;
93  __m128i t2 = *(__m128i *) i2;
94
95  switch (imm)
96    {
97    case 0:
98      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 0);
99      break;
100    case 1:
101      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 1);
102      break;
103    case 2:
104      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 2);
105      break;
106    case 3:
107      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 3);
108      break;
109    case 4:
110      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 4);
111      break;
112    case 5:
113      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 5);
114      break;
115    case 6:
116      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 6);
117      break;
118    case 7:
119      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 7);
120      break;
121    case 8:
122      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 8);
123      break;
124    case 9:
125      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 9);
126      break;
127    case 10:
128      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 10);
129      break;
130    case 11:
131      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 11);
132      break;
133    case 12:
134      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 12);
135      break;
136    case 13:
137      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 13);
138      break;
139    case 14:
140      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 14);
141      break;
142    case 15:
143      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 15);
144      break;
145    case 16:
146      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 16);
147      break;
148    case 17:
149      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 17);
150      break;
151    case 18:
152      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 18);
153      break;
154    case 19:
155      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 19);
156      break;
157    case 20:
158      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 20);
159      break;
160    case 21:
161      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 21);
162      break;
163    case 22:
164      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 22);
165      break;
166    case 23:
167      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 23);
168      break;
169    case 24:
170      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 24);
171      break;
172    case 25:
173      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 25);
174      break;
175    case 26:
176      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 26);
177      break;
178    case 27:
179      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 27);
180      break;
181    case 28:
182      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 28);
183      break;
184    case 29:
185      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 29);
186      break;
187    case 30:
188      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 30);
189      break;
190    case 31:
191      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 31);
192      break;
193    default:
194      *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 32);
195      break;
196    }
197}
198
199/* Routine to manually compute the results */
200static void
201compute_correct_result_128 (int *i1, int *i2, unsigned int imm, int *r)
202{
203  char buf [32];
204  char *bout = (char *) r;
205  int i;
206
207  memcpy (&buf[0], i2, 16);
208  memcpy (&buf[16], i1, 16);
209
210  for (i = 0; i < 16; i++)
211    if (imm >= 32 || imm + i >= 32)
212      bout[i] = 0;
213    else
214      bout[i] = buf[imm + i];
215}
216
217#ifndef __AVX__
218static void
219compute_correct_result_64 (int *i1, int *i2, unsigned int imm, int *r)
220{
221  char buf [16];
222  char *bout = (char *)r;
223  int i;
224
225  /* Handle the first half */
226  memcpy (&buf[0], i2, 8);
227  memcpy (&buf[8], i1, 8);
228
229  for (i = 0; i < 8; i++)
230    if (imm >= 16 || imm + i >= 16)
231      bout[i] = 0;
232    else
233      bout[i] = buf[imm + i];
234
235  /* Handle the second half */
236  memcpy (&buf[0], &i2[2], 8);
237  memcpy (&buf[8], &i1[2], 8);
238
239  for (i = 0; i < 8; i++)
240    if (imm >= 16 || imm + i >= 16)
241      bout[i + 8] = 0;
242    else
243      bout[i + 8] = buf[imm + i];
244}
245#endif
246
247static void
248TEST (void)
249{
250  int i;
251  int r [4] __attribute__ ((aligned(16)));
252  int ck [4];
253  unsigned int imm;
254  int fail = 0;
255
256  for (i = 0; i < 256; i += 8)
257    for (imm = 0; imm < 100; imm++)
258      {
259#ifndef __AVX__
260	/* Manually compute the result */
261	compute_correct_result_64 (&vals[i + 0], &vals[i + 4], imm, ck);
262
263	/* Run the 64-bit tests */
264	ssse3_test_palignr (&vals[i + 0], &vals[i + 4], imm, &r[0]);
265	ssse3_test_palignr (&vals[i + 2], &vals[i + 6], imm, &r[2]);
266	fail += chk_128 (ck, r);
267#endif
268
269	/* Recompute the results for 128-bits */
270	compute_correct_result_128 (&vals[i + 0], &vals[i + 4], imm, ck);
271
272	/* Run the 128-bit tests */
273	ssse3_test_palignr128 (&vals[i + 0], &vals[i + 4], imm, r);
274	fail += chk_128 (ck, r);
275      }
276
277  if (fail != 0)
278    abort ();
279}
280