1/* { dg-do run } */
2/* { dg-options "-O2 -msse2" } */
3/* { dg-require-effective-target sse2 } */
4
5#include "sse2-check.h"
6
7#include <emmintrin.h>
8#include <string.h>
9
10#define SHIFT (4)
11
12typedef union {
13  __m128i v;
14  unsigned int s[4];
15  unsigned short int t[8];
16  unsigned long long u[2];
17  unsigned char c[16];
18}vecInLong;
19
20void sse2_tests (void) __attribute__((noinline));
21void dump128_16 (char *, char *, vecInLong);
22void dump128_32 (char *, char *, vecInLong);
23void dump128_64 (char *, char *, vecInLong);
24void dump128_128 (char *, char *, vecInLong);
25int check (const char *, const char *[]);
26
27char buf[8000];
28char comparison[8000];
29static int errors = 0;
30
31vecInLong a128, b128, c128, d128, e128, f128;
32__m128i m128_16, m128_32, s128, m128_64, m128_128;
33__m64 m64_16, s64, m64_32, m64_64;
34
35const char *reference_sse2[] = {
36  "_mm_srai_epi16 0012 0012 0012 0012 0012 0012 0012 0012 \n",
37  "_mm_sra_epi16 0012 0012 0012 0012 0012 0012 0012 0012 \n",
38  "_mm_srai_epi32 00123456 00123456 00123456 00123456 \n",
39  "_mm_sra_epi32 00123456 00123456 00123456 00123456 \n",
40  "_mm_srli_epi16 0012 0012 0012 0012 0012 0012 0012 0012 \n",
41  "_mm_srl_epi16 0012 0012 0012 0012 0012 0012 0012 0012 \n",
42  "_mm_srli_epi32 00123456 00123456 00123456 00123456 \n",
43  "_mm_srl_epi32 00123456 00123456 00123456 00123456 \n",
44  "_mm_srli_epi64 00123456789abcde 00123456789abcde \n",
45  "_mm_srl_epi64 00123456789abcde 00123456789abcde \n",
46  "_mm_srli_si128 (byte shift)  00000000ffeeddccbbaa998877665544\n",
47  "_mm_slli_epi16 1230 1230 1230 1230 1230 1230 1230 1230 \n",
48  "_mm_sll_epi16 1230 1230 1230 1230 1230 1230 1230 1230 \n",
49  "_mm_slli_epi32 12345670 12345670 12345670 12345670 \n",
50  "_mm_sll_epi32 12345670 12345670 12345670 12345670 \n",
51  "_mm_slli_epi64 123456789abcdef0 123456789abcdef0 \n",
52  "_mm_sll_epi64 123456789abcdef0 123456789abcdef0 \n",
53  "_mm_sll_si128 (byte shift) bbaa9988776655443322110000000000\n",
54  "_mm_shuffle_epi32 ffeeddcc bbaa9988 77665544 33221100 \n",
55  "_mm_shuffelo_epi16 7766 5544 3322 1100 9988 bbaa ddcc ffee \n",
56  "_mm_shuffehi_epi16 1100 3322 5544 7766 ffee ddcc bbaa 9988 \n",
57  ""
58};
59
60static void
61sse2_test (void)
62{
63  a128.s[0] = 0x01234567;
64  a128.s[1] = 0x01234567;
65  a128.s[2] = 0x01234567;
66  a128.s[3] = 0x01234567;
67
68  m128_32 = a128.v;
69
70  d128.u[0] = 0x0123456789abcdefULL;
71  d128.u[1] = 0x0123456789abcdefULL;
72
73  m128_64 = d128.v;
74
75  /* This is the 128-bit constant 0x00112233445566778899aabbccddeeff,
76     expressed as two little-endian 64-bit words.  */
77  e128.u[0] = 0x7766554433221100ULL;
78  e128.u[1] = 0xffeeddccbbaa9988ULL;
79
80  f128.t[0] = 0x0123;
81  f128.t[1] = 0x0123;
82  f128.t[2] = 0x0123;
83  f128.t[3] = 0x0123;
84  f128.t[4] = 0x0123;
85  f128.t[5] = 0x0123;
86  f128.t[6] = 0x0123;
87  f128.t[7] = 0x0123;
88
89  m128_16 = f128.v;
90
91  m128_128 = e128.v;
92
93  b128.s[0] = SHIFT;
94  b128.s[1] = 0;
95  b128.s[2] = 0;
96  b128.s[3] = 0;
97
98  s128 = b128.v;
99
100  sse2_tests();
101  check (buf, reference_sse2);
102#ifdef DEBUG
103  printf ("sse2 testing:\n");
104  printf (buf);
105  printf ("\ncomparison:\n");
106  printf (comparison);
107#endif
108  buf[0] = '\0';
109
110  if (errors != 0)
111    abort ();
112}
113
114void __attribute__((noinline))
115sse2_tests (void)
116{
117  /* psraw */
118  c128.v = _mm_srai_epi16 (m128_16, SHIFT);
119  dump128_16 (buf, "_mm_srai_epi16", c128);
120  c128.v = _mm_sra_epi16 (m128_16, s128);
121  dump128_16 (buf, "_mm_sra_epi16", c128);
122
123  /* psrad */
124  c128.v = _mm_srai_epi32 (m128_32, SHIFT);
125  dump128_32 (buf, "_mm_srai_epi32", c128);
126  c128.v = _mm_sra_epi32 (m128_32, s128);
127  dump128_32 (buf, "_mm_sra_epi32", c128);
128
129  /* psrlw */
130  c128.v = _mm_srli_epi16 (m128_16, SHIFT);
131  dump128_16 (buf, "_mm_srli_epi16", c128);
132  c128.v = _mm_srl_epi16 (m128_16, s128);
133  dump128_16 (buf, "_mm_srl_epi16", c128);
134
135  /* psrld */
136  c128.v = _mm_srli_epi32 (m128_32, SHIFT);
137  dump128_32 (buf, "_mm_srli_epi32", c128);
138  c128.v = _mm_srl_epi32 (m128_32, s128);
139  dump128_32 (buf, "_mm_srl_epi32", c128);
140
141  /* psrlq */
142  c128.v = _mm_srli_epi64 (m128_64, SHIFT);
143  dump128_64 (buf, "_mm_srli_epi64", c128);
144  c128.v = _mm_srl_epi64 (m128_64, s128);
145  dump128_64 (buf, "_mm_srl_epi64", c128);
146
147  /* psrldq */
148  c128.v = _mm_srli_si128 (m128_128, SHIFT);
149  dump128_128 (buf, "_mm_srli_si128 (byte shift) ", c128);
150
151  /* psllw */
152  c128.v = _mm_slli_epi16 (m128_16, SHIFT);
153  dump128_16 (buf, "_mm_slli_epi16", c128);
154  c128.v = _mm_sll_epi16 (m128_16, s128);
155  dump128_16 (buf, "_mm_sll_epi16", c128);
156
157  /* pslld */
158  c128.v = _mm_slli_epi32 (m128_32, SHIFT);
159  dump128_32 (buf, "_mm_slli_epi32", c128);
160  c128.v = _mm_sll_epi32 (m128_32, s128);
161  dump128_32 (buf, "_mm_sll_epi32", c128);
162
163  /* psllq */
164  c128.v = _mm_slli_epi64 (m128_64, SHIFT);
165  dump128_64 (buf, "_mm_slli_epi64", c128);
166  c128.v = _mm_sll_epi64 (m128_64, s128);
167  dump128_64 (buf, "_mm_sll_epi64", c128);
168
169  /* pslldq */
170  c128.v = _mm_slli_si128 (m128_128, SHIFT);
171  dump128_128 (buf, "_mm_sll_si128 (byte shift)", c128);
172
173  /* Shuffle constant 0x1b == 0b_00_01_10_11, e.g. swap words: ABCD => DCBA.  */
174
175  /* pshufd */
176  c128.v = _mm_shuffle_epi32 (m128_128, 0x1b);
177  dump128_32 (buf, "_mm_shuffle_epi32", c128);
178
179  /* pshuflw */
180  c128.v = _mm_shufflelo_epi16 (m128_128, 0x1b);
181  dump128_16 (buf, "_mm_shuffelo_epi16", c128);
182
183  /* pshufhw */
184  c128.v = _mm_shufflehi_epi16 (m128_128, 0x1b);
185  dump128_16 (buf, "_mm_shuffehi_epi16", c128);
186}
187
188void
189dump128_16 (char *buf, char *name, vecInLong x)
190{
191  int i;
192  char *p = buf + strlen (buf);
193
194  sprintf (p, "%s ", name);
195  p += strlen (p);
196
197  for (i=0; i<8; i++)
198    {
199      sprintf (p, "%4.4x ", x.t[i]);
200      p += strlen (p);
201    }
202  strcat (p, "\n");
203}
204
205void
206dump128_32 (char *buf, char *name, vecInLong x)
207{
208  int i;
209  char *p = buf + strlen (buf);
210
211  sprintf (p, "%s ", name);
212  p += strlen (p);
213
214  for (i=0; i<4; i++)
215    {
216      sprintf (p, "%8.8x ", x.s[i]);
217      p += strlen (p);
218    }
219  strcat (p, "\n");
220}
221
222void
223dump128_64 (char *buf, char *name, vecInLong x)
224{
225  int i;
226  char *p = buf + strlen (buf);
227
228  sprintf (p, "%s ", name);
229  p += strlen (p);
230
231  for (i=0; i<2; i++)
232    {
233#if defined(_WIN32) && !defined(__CYGWIN__)
234      sprintf (p, "%16.16I64x ", x.u[i]);
235#else
236      sprintf (p, "%16.16llx ", x.u[i]);
237#endif
238      p += strlen (p);
239    }
240  strcat (p, "\n");
241}
242
243void
244dump128_128 (char *buf, char *name, vecInLong x)
245{
246  int i;
247  char *p = buf + strlen (buf);
248
249  sprintf (p, "%s ", name);
250  p += strlen (p);
251
252  for (i=15; i>=0; i--)
253    {
254      /* This is cheating; we don't have a 128-bit int format code.
255	 Running the loop backwards to compensate for the
256	 little-endian layout. */
257      sprintf (p, "%2.2x", x.c[i]);
258      p += strlen (p);
259    }
260  strcat (p, "\n");
261}
262
263int
264check (const char *input, const char *reference[])
265{
266  int broken, i, j, len;
267  const char *p_input;
268  char *p_comparison;
269  int new_errors = 0;
270
271  p_comparison = &comparison[0];
272  p_input = input;
273
274  for (i = 0; *reference[i] != '\0'; i++)
275    {
276      broken = 0;
277      len = strlen (reference[i]);
278      for (j = 0; j < len; j++)
279	{
280	  /* Ignore the terminating NUL characters at the end of every string in 'reference[]'.  */
281	  if (!broken && *p_input != reference[i][j])
282	    {
283	      *p_comparison = '\0';
284	      strcat (p_comparison, " >>> ");
285	      p_comparison += strlen (p_comparison);
286	      new_errors++;
287	      broken = 1;
288	    }
289	  *p_comparison = *p_input;
290	  p_comparison++;
291	  p_input++;
292	}
293      if (broken)
294	{
295	  *p_comparison = '\0';
296	  strcat (p_comparison, "expected:\n");
297	  strcat (p_comparison, reference[i]);
298	  p_comparison += strlen (p_comparison);
299	}
300    }
301  *p_comparison = '\0';
302  strcat (p_comparison, new_errors ? "failure\n\n" : "O.K.\n\n") ;
303  errors += new_errors;
304  return 0;
305}
306