1/* { dg-do run { target i?86-*-* x86_64-*-* } } */
2/* { dg-options "-O2 -msse2" } */
3#include <xmmintrin.h>
4#include <stdio.h>
5#include <stdlib.h>
6#include <string.h>
7#include "../../gcc.dg/i386-cpuid.h"
8
9#ifndef NOINLINE
10#define NOINLINE __attribute__ ((noinline))
11#endif
12
13#define SHIFT (4)
14
15typedef union {
16  __m128i v;
17  unsigned int s[4];
18  unsigned short int t[8];
19  unsigned long long u[2];
20  unsigned char c[16];
21}vecInLong;
22
23void sse2_tests (void) NOINLINE;
24void dump128_16 (char *, char *, vecInLong);
25void dump128_32 (char *, char *, vecInLong);
26void dump128_64 (char *, char *, vecInLong);
27void dump128_128 (char *, char *, vecInLong);
28int check (const char *, const char *[]);
29
30char buf[8000];
31char comparison[8000];
32static int errors = 0;
33
34vecInLong a128, b128, c128, d128, e128, f128;
35__m128i m128_16, m128_32, s128, m128_64, m128_128;
36__m64 m64_16, s64, m64_32, m64_64;
37
38const char *reference_sse2[] = {
39  "_mm_srai_epi16 0012 0012 0012 0012 0012 0012 0012 0012 \n",
40  "_mm_sra_epi16 0012 0012 0012 0012 0012 0012 0012 0012 \n",
41  "_mm_srai_epi32 00123456 00123456 00123456 00123456 \n",
42  "_mm_sra_epi32 00123456 00123456 00123456 00123456 \n",
43  "_mm_srli_epi16 0012 0012 0012 0012 0012 0012 0012 0012 \n",
44  "_mm_srl_epi16 0012 0012 0012 0012 0012 0012 0012 0012 \n",
45  "_mm_srli_epi32 00123456 00123456 00123456 00123456 \n",
46  "_mm_srl_epi32 00123456 00123456 00123456 00123456 \n",
47  "_mm_srli_epi64 00123456789abcde 00123456789abcde \n",
48  "_mm_srl_epi64 00123456789abcde 00123456789abcde \n",
49  "_mm_srli_si128 (byte shift)  00000000ffeeddccbbaa998877665544\n",
50  "_mm_slli_epi16 1230 1230 1230 1230 1230 1230 1230 1230 \n",
51  "_mm_sll_epi16 1230 1230 1230 1230 1230 1230 1230 1230 \n",
52  "_mm_slli_epi32 12345670 12345670 12345670 12345670 \n",
53  "_mm_sll_epi32 12345670 12345670 12345670 12345670 \n",
54  "_mm_slli_epi64 123456789abcdef0 123456789abcdef0 \n",
55  "_mm_sll_epi64 123456789abcdef0 123456789abcdef0 \n",
56  "_mm_sll_si128 (byte shift) bbaa9988776655443322110000000000\n",
57  "_mm_shuffle_epi32 ffeeddcc bbaa9988 77665544 33221100 \n",
58  "_mm_shuffelo_epi16 7766 5544 3322 1100 9988 bbaa ddcc ffee \n",
59  "_mm_shuffehi_epi16 1100 3322 5544 7766 ffee ddcc bbaa 9988 \n",
60  ""
61};
62
63int main()
64{
65  unsigned long cpu_facilities;
66
67  cpu_facilities = i386_cpuid ();
68
69  if ((cpu_facilities & (bit_MMX | bit_SSE | bit_SSE2 | bit_CMOV))
70      != (bit_MMX | bit_SSE | bit_SSE2 | bit_CMOV))
71    /* If host has no vector support, pass.  */
72    exit (0);
73
74  a128.s[0] = 0x01234567;
75  a128.s[1] = 0x01234567;
76  a128.s[2] = 0x01234567;
77  a128.s[3] = 0x01234567;
78
79  m128_32 = a128.v;
80
81  d128.u[0] = 0x0123456789abcdefULL;
82  d128.u[1] = 0x0123456789abcdefULL;
83
84  m128_64 = d128.v;
85
86  /* This is the 128-bit constant 0x00112233445566778899aabbccddeeff,
87     expressed as two little-endian 64-bit words.  */
88  e128.u[0] = 0x7766554433221100ULL;
89  e128.u[1] = 0xffeeddccbbaa9988ULL;
90
91  f128.t[0] = 0x0123;
92  f128.t[1] = 0x0123;
93  f128.t[2] = 0x0123;
94  f128.t[3] = 0x0123;
95  f128.t[4] = 0x0123;
96  f128.t[5] = 0x0123;
97  f128.t[6] = 0x0123;
98  f128.t[7] = 0x0123;
99
100  m128_16 = f128.v;
101
102  m128_128 = e128.v;
103
104  b128.s[0] = SHIFT;
105  b128.s[1] = 0;
106  b128.s[2] = 0;
107  b128.s[3] = 0;
108
109  s128 = b128.v;
110
111  if (cpu_facilities & bit_SSE2)
112    {
113      sse2_tests();
114      check (buf, reference_sse2);
115#ifdef DEBUG
116      printf ("sse2 testing:\n");
117      printf (buf);
118      printf ("\ncomparison:\n");
119      printf (comparison);
120#endif
121      buf[0] = '\0';
122    }
123
124  if (errors != 0)
125    abort ();
126  exit (0);
127}
128
129void NOINLINE
130sse2_tests (void)
131{
132  /* psraw */
133  c128.v = _mm_srai_epi16 (m128_16, SHIFT);
134  dump128_16 (buf, "_mm_srai_epi16", c128);
135  c128.v = _mm_sra_epi16 (m128_16, s128);
136  dump128_16 (buf, "_mm_sra_epi16", c128);
137
138  /* psrad */
139  c128.v = _mm_srai_epi32 (m128_32, SHIFT);
140  dump128_32 (buf, "_mm_srai_epi32", c128);
141  c128.v = _mm_sra_epi32 (m128_32, s128);
142  dump128_32 (buf, "_mm_sra_epi32", c128);
143
144  /* psrlw */
145  c128.v = _mm_srli_epi16 (m128_16, SHIFT);
146  dump128_16 (buf, "_mm_srli_epi16", c128);
147  c128.v = _mm_srl_epi16 (m128_16, s128);
148  dump128_16 (buf, "_mm_srl_epi16", c128);
149
150  /* psrld */
151  c128.v = _mm_srli_epi32 (m128_32, SHIFT);
152  dump128_32 (buf, "_mm_srli_epi32", c128);
153  c128.v = _mm_srl_epi32 (m128_32, s128);
154  dump128_32 (buf, "_mm_srl_epi32", c128);
155
156  /* psrlq */
157  c128.v = _mm_srli_epi64 (m128_64, SHIFT);
158  dump128_64 (buf, "_mm_srli_epi64", c128);
159  c128.v = _mm_srl_epi64 (m128_64, s128);
160  dump128_64 (buf, "_mm_srl_epi64", c128);
161
162  /* psrldq */
163  c128.v = _mm_srli_si128 (m128_128, SHIFT);
164  dump128_128 (buf, "_mm_srli_si128 (byte shift) ", c128);
165
166  /* psllw */
167  c128.v = _mm_slli_epi16 (m128_16, SHIFT);
168  dump128_16 (buf, "_mm_slli_epi16", c128);
169  c128.v = _mm_sll_epi16 (m128_16, s128);
170  dump128_16 (buf, "_mm_sll_epi16", c128);
171
172  /* pslld */
173  c128.v = _mm_slli_epi32 (m128_32, SHIFT);
174  dump128_32 (buf, "_mm_slli_epi32", c128);
175  c128.v = _mm_sll_epi32 (m128_32, s128);
176  dump128_32 (buf, "_mm_sll_epi32", c128);
177
178  /* psllq */
179  c128.v = _mm_slli_epi64 (m128_64, SHIFT);
180  dump128_64 (buf, "_mm_slli_epi64", c128);
181  c128.v = _mm_sll_epi64 (m128_64, s128);
182  dump128_64 (buf, "_mm_sll_epi64", c128);
183
184  /* pslldq */
185  c128.v = _mm_slli_si128 (m128_128, SHIFT);
186  dump128_128 (buf, "_mm_sll_si128 (byte shift)", c128);
187
188  /* Shuffle constant 0x1b == 0b_00_01_10_11, e.g. swap words: ABCD => DCBA.  */
189
190  /* pshufd */
191  c128.v = _mm_shuffle_epi32 (m128_128, 0x1b);
192  dump128_32 (buf, "_mm_shuffle_epi32", c128);
193
194  /* pshuflw */
195  c128.v = _mm_shufflelo_epi16 (m128_128, 0x1b);
196  dump128_16 (buf, "_mm_shuffelo_epi16", c128);
197
198  /* pshufhw */
199  c128.v = _mm_shufflehi_epi16 (m128_128, 0x1b);
200  dump128_16 (buf, "_mm_shuffehi_epi16", c128);
201}
202
203void
204dump128_16 (char *buf, char *name, vecInLong x)
205{
206  int i;
207  char *p = buf + strlen (buf);
208
209  sprintf (p, "%s ", name);
210  p += strlen (p);
211
212  for (i=0; i<8; i++)
213    {
214      sprintf (p, "%4.4x ", x.t[i]);
215      p += strlen (p);
216    }
217  strcat (p, "\n");
218}
219
220void
221dump128_32 (char *buf, char *name, vecInLong x)
222{
223  int i;
224  char *p = buf + strlen (buf);
225
226  sprintf (p, "%s ", name);
227  p += strlen (p);
228
229  for (i=0; i<4; i++)
230    {
231      sprintf (p, "%8.8x ", x.s[i]);
232      p += strlen (p);
233    }
234  strcat (p, "\n");
235}
236
237void
238dump128_64 (char *buf, char *name, vecInLong x)
239{
240  int i;
241  char *p = buf + strlen (buf);
242
243  sprintf (p, "%s ", name);
244  p += strlen (p);
245
246  for (i=0; i<2; i++)
247    {
248      sprintf (p, "%16.16llx ", x.u[i]);
249      p += strlen (p);
250    }
251  strcat (p, "\n");
252}
253
254void
255dump128_128 (char *buf, char *name, vecInLong x)
256{
257  int i;
258  char *p = buf + strlen (buf);
259
260  sprintf (p, "%s ", name);
261  p += strlen (p);
262
263  for (i=15; i>=0; i--)
264    {
265      /* This is cheating; we don't have a 128-bit int format code.
266	 Running the loop backwards to compensate for the
267	 little-endian layout. */
268      sprintf (p, "%2.2x", x.c[i]);
269      p += strlen (p);
270    }
271  strcat (p, "\n");
272}
273
274int
275check (const char *input, const char *reference[])
276{
277  int broken, i, j, len;
278  const char *p_input;
279  char *p_comparison;
280  int new_errors = 0;
281
282  p_comparison = &comparison[0];
283  p_input = input;
284
285  for (i = 0; *reference[i] != '\0'; i++)
286    {
287      broken = 0;
288      len = strlen (reference[i]);
289      for (j = 0; j < len; j++)
290	{
291	  /* Ignore the terminating NUL characters at the end of every string in 'reference[]'.  */
292	  if (!broken && *p_input != reference[i][j])
293	    {
294	      *p_comparison = '\0';
295	      strcat (p_comparison, " >>> ");
296	      p_comparison += strlen (p_comparison);
297	      new_errors++;
298	      broken = 1;
299	    }
300	  *p_comparison = *p_input;
301	  p_comparison++;
302	  p_input++;
303	}
304      if (broken)
305	{
306	  *p_comparison = '\0';
307	  strcat (p_comparison, "expected:\n");
308	  strcat (p_comparison, reference[i]);
309	  p_comparison += strlen (p_comparison);
310	}
311    }
312  *p_comparison = '\0';
313  strcat (p_comparison, new_errors ? "failure\n\n" : "O.K.\n\n") ;
314  errors += new_errors;
315  return 0;
316}
317