1/* { dg-do run } */ 2/* { dg-options "-O2 -msse2" } */ 3/* { dg-require-effective-target sse2 } */ 4 5#include "sse2-check.h" 6 7#include <emmintrin.h> 8#include <string.h> 9 10#define SHIFT (4) 11 12typedef union { 13 __m128i v; 14 unsigned int s[4]; 15 unsigned short int t[8]; 16 unsigned long long u[2]; 17 unsigned char c[16]; 18}vecInLong; 19 20void sse2_tests (void) __attribute__((noinline)); 21void dump128_16 (char *, char *, vecInLong); 22void dump128_32 (char *, char *, vecInLong); 23void dump128_64 (char *, char *, vecInLong); 24void dump128_128 (char *, char *, vecInLong); 25int check (const char *, const char *[]); 26 27char buf[8000]; 28char comparison[8000]; 29static int errors = 0; 30 31vecInLong a128, b128, c128, d128, e128, f128; 32__m128i m128_16, m128_32, s128, m128_64, m128_128; 33__m64 m64_16, s64, m64_32, m64_64; 34 35const char *reference_sse2[] = { 36 "_mm_srai_epi16 0012 0012 0012 0012 0012 0012 0012 0012 \n", 37 "_mm_sra_epi16 0012 0012 0012 0012 0012 0012 0012 0012 \n", 38 "_mm_srai_epi32 00123456 00123456 00123456 00123456 \n", 39 "_mm_sra_epi32 00123456 00123456 00123456 00123456 \n", 40 "_mm_srli_epi16 0012 0012 0012 0012 0012 0012 0012 0012 \n", 41 "_mm_srl_epi16 0012 0012 0012 0012 0012 0012 0012 0012 \n", 42 "_mm_srli_epi32 00123456 00123456 00123456 00123456 \n", 43 "_mm_srl_epi32 00123456 00123456 00123456 00123456 \n", 44 "_mm_srli_epi64 00123456789abcde 00123456789abcde \n", 45 "_mm_srl_epi64 00123456789abcde 00123456789abcde \n", 46 "_mm_srli_si128 (byte shift) 00000000ffeeddccbbaa998877665544\n", 47 "_mm_slli_epi16 1230 1230 1230 1230 1230 1230 1230 1230 \n", 48 "_mm_sll_epi16 1230 1230 1230 1230 1230 1230 1230 1230 \n", 49 "_mm_slli_epi32 12345670 12345670 12345670 12345670 \n", 50 "_mm_sll_epi32 12345670 12345670 12345670 12345670 \n", 51 "_mm_slli_epi64 123456789abcdef0 123456789abcdef0 \n", 52 "_mm_sll_epi64 123456789abcdef0 123456789abcdef0 \n", 53 "_mm_sll_si128 (byte shift) bbaa9988776655443322110000000000\n", 54 "_mm_shuffle_epi32 ffeeddcc bbaa9988 77665544 33221100 \n", 55 "_mm_shuffelo_epi16 7766 5544 3322 1100 9988 bbaa ddcc ffee \n", 56 "_mm_shuffehi_epi16 1100 3322 5544 7766 ffee ddcc bbaa 9988 \n", 57 "" 58}; 59 60static void 61sse2_test (void) 62{ 63 a128.s[0] = 0x01234567; 64 a128.s[1] = 0x01234567; 65 a128.s[2] = 0x01234567; 66 a128.s[3] = 0x01234567; 67 68 m128_32 = a128.v; 69 70 d128.u[0] = 0x0123456789abcdefULL; 71 d128.u[1] = 0x0123456789abcdefULL; 72 73 m128_64 = d128.v; 74 75 /* This is the 128-bit constant 0x00112233445566778899aabbccddeeff, 76 expressed as two little-endian 64-bit words. */ 77 e128.u[0] = 0x7766554433221100ULL; 78 e128.u[1] = 0xffeeddccbbaa9988ULL; 79 80 f128.t[0] = 0x0123; 81 f128.t[1] = 0x0123; 82 f128.t[2] = 0x0123; 83 f128.t[3] = 0x0123; 84 f128.t[4] = 0x0123; 85 f128.t[5] = 0x0123; 86 f128.t[6] = 0x0123; 87 f128.t[7] = 0x0123; 88 89 m128_16 = f128.v; 90 91 m128_128 = e128.v; 92 93 b128.s[0] = SHIFT; 94 b128.s[1] = 0; 95 b128.s[2] = 0; 96 b128.s[3] = 0; 97 98 s128 = b128.v; 99 100 sse2_tests(); 101 check (buf, reference_sse2); 102#ifdef DEBUG 103 printf ("sse2 testing:\n"); 104 printf (buf); 105 printf ("\ncomparison:\n"); 106 printf (comparison); 107#endif 108 buf[0] = '\0'; 109 110 if (errors != 0) 111 abort (); 112} 113 114void __attribute__((noinline)) 115sse2_tests (void) 116{ 117 /* psraw */ 118 c128.v = _mm_srai_epi16 (m128_16, SHIFT); 119 dump128_16 (buf, "_mm_srai_epi16", c128); 120 c128.v = _mm_sra_epi16 (m128_16, s128); 121 dump128_16 (buf, "_mm_sra_epi16", c128); 122 123 /* psrad */ 124 c128.v = _mm_srai_epi32 (m128_32, SHIFT); 125 dump128_32 (buf, "_mm_srai_epi32", c128); 126 c128.v = _mm_sra_epi32 (m128_32, s128); 127 dump128_32 (buf, "_mm_sra_epi32", c128); 128 129 /* psrlw */ 130 c128.v = _mm_srli_epi16 (m128_16, SHIFT); 131 dump128_16 (buf, "_mm_srli_epi16", c128); 132 c128.v = _mm_srl_epi16 (m128_16, s128); 133 dump128_16 (buf, "_mm_srl_epi16", c128); 134 135 /* psrld */ 136 c128.v = _mm_srli_epi32 (m128_32, SHIFT); 137 dump128_32 (buf, "_mm_srli_epi32", c128); 138 c128.v = _mm_srl_epi32 (m128_32, s128); 139 dump128_32 (buf, "_mm_srl_epi32", c128); 140 141 /* psrlq */ 142 c128.v = _mm_srli_epi64 (m128_64, SHIFT); 143 dump128_64 (buf, "_mm_srli_epi64", c128); 144 c128.v = _mm_srl_epi64 (m128_64, s128); 145 dump128_64 (buf, "_mm_srl_epi64", c128); 146 147 /* psrldq */ 148 c128.v = _mm_srli_si128 (m128_128, SHIFT); 149 dump128_128 (buf, "_mm_srli_si128 (byte shift) ", c128); 150 151 /* psllw */ 152 c128.v = _mm_slli_epi16 (m128_16, SHIFT); 153 dump128_16 (buf, "_mm_slli_epi16", c128); 154 c128.v = _mm_sll_epi16 (m128_16, s128); 155 dump128_16 (buf, "_mm_sll_epi16", c128); 156 157 /* pslld */ 158 c128.v = _mm_slli_epi32 (m128_32, SHIFT); 159 dump128_32 (buf, "_mm_slli_epi32", c128); 160 c128.v = _mm_sll_epi32 (m128_32, s128); 161 dump128_32 (buf, "_mm_sll_epi32", c128); 162 163 /* psllq */ 164 c128.v = _mm_slli_epi64 (m128_64, SHIFT); 165 dump128_64 (buf, "_mm_slli_epi64", c128); 166 c128.v = _mm_sll_epi64 (m128_64, s128); 167 dump128_64 (buf, "_mm_sll_epi64", c128); 168 169 /* pslldq */ 170 c128.v = _mm_slli_si128 (m128_128, SHIFT); 171 dump128_128 (buf, "_mm_sll_si128 (byte shift)", c128); 172 173 /* Shuffle constant 0x1b == 0b_00_01_10_11, e.g. swap words: ABCD => DCBA. */ 174 175 /* pshufd */ 176 c128.v = _mm_shuffle_epi32 (m128_128, 0x1b); 177 dump128_32 (buf, "_mm_shuffle_epi32", c128); 178 179 /* pshuflw */ 180 c128.v = _mm_shufflelo_epi16 (m128_128, 0x1b); 181 dump128_16 (buf, "_mm_shuffelo_epi16", c128); 182 183 /* pshufhw */ 184 c128.v = _mm_shufflehi_epi16 (m128_128, 0x1b); 185 dump128_16 (buf, "_mm_shuffehi_epi16", c128); 186} 187 188void 189dump128_16 (char *buf, char *name, vecInLong x) 190{ 191 int i; 192 char *p = buf + strlen (buf); 193 194 sprintf (p, "%s ", name); 195 p += strlen (p); 196 197 for (i=0; i<8; i++) 198 { 199 sprintf (p, "%4.4x ", x.t[i]); 200 p += strlen (p); 201 } 202 strcat (p, "\n"); 203} 204 205void 206dump128_32 (char *buf, char *name, vecInLong x) 207{ 208 int i; 209 char *p = buf + strlen (buf); 210 211 sprintf (p, "%s ", name); 212 p += strlen (p); 213 214 for (i=0; i<4; i++) 215 { 216 sprintf (p, "%8.8x ", x.s[i]); 217 p += strlen (p); 218 } 219 strcat (p, "\n"); 220} 221 222void 223dump128_64 (char *buf, char *name, vecInLong x) 224{ 225 int i; 226 char *p = buf + strlen (buf); 227 228 sprintf (p, "%s ", name); 229 p += strlen (p); 230 231 for (i=0; i<2; i++) 232 { 233#if defined(_WIN32) && !defined(__CYGWIN__) 234 sprintf (p, "%16.16I64x ", x.u[i]); 235#else 236 sprintf (p, "%16.16llx ", x.u[i]); 237#endif 238 p += strlen (p); 239 } 240 strcat (p, "\n"); 241} 242 243void 244dump128_128 (char *buf, char *name, vecInLong x) 245{ 246 int i; 247 char *p = buf + strlen (buf); 248 249 sprintf (p, "%s ", name); 250 p += strlen (p); 251 252 for (i=15; i>=0; i--) 253 { 254 /* This is cheating; we don't have a 128-bit int format code. 255 Running the loop backwards to compensate for the 256 little-endian layout. */ 257 sprintf (p, "%2.2x", x.c[i]); 258 p += strlen (p); 259 } 260 strcat (p, "\n"); 261} 262 263int 264check (const char *input, const char *reference[]) 265{ 266 int broken, i, j, len; 267 const char *p_input; 268 char *p_comparison; 269 int new_errors = 0; 270 271 p_comparison = &comparison[0]; 272 p_input = input; 273 274 for (i = 0; *reference[i] != '\0'; i++) 275 { 276 broken = 0; 277 len = strlen (reference[i]); 278 for (j = 0; j < len; j++) 279 { 280 /* Ignore the terminating NUL characters at the end of every string in 'reference[]'. */ 281 if (!broken && *p_input != reference[i][j]) 282 { 283 *p_comparison = '\0'; 284 strcat (p_comparison, " >>> "); 285 p_comparison += strlen (p_comparison); 286 new_errors++; 287 broken = 1; 288 } 289 *p_comparison = *p_input; 290 p_comparison++; 291 p_input++; 292 } 293 if (broken) 294 { 295 *p_comparison = '\0'; 296 strcat (p_comparison, "expected:\n"); 297 strcat (p_comparison, reference[i]); 298 p_comparison += strlen (p_comparison); 299 } 300 } 301 *p_comparison = '\0'; 302 strcat (p_comparison, new_errors ? "failure\n\n" : "O.K.\n\n") ; 303 errors += new_errors; 304 return 0; 305} 306