1/* { dg-do run { target i?86-*-* x86_64-*-* } } */ 2/* { dg-options "-O2 -msse2" } */ 3#include <xmmintrin.h> 4#include <stdio.h> 5#include <stdlib.h> 6#include <string.h> 7#include "../../gcc.dg/i386-cpuid.h" 8 9#ifndef NOINLINE 10#define NOINLINE __attribute__ ((noinline)) 11#endif 12 13#define SHIFT (4) 14 15typedef union { 16 __m128i v; 17 unsigned int s[4]; 18 unsigned short int t[8]; 19 unsigned long long u[2]; 20 unsigned char c[16]; 21}vecInLong; 22 23void sse2_tests (void) NOINLINE; 24void dump128_16 (char *, char *, vecInLong); 25void dump128_32 (char *, char *, vecInLong); 26void dump128_64 (char *, char *, vecInLong); 27void dump128_128 (char *, char *, vecInLong); 28int check (const char *, const char *[]); 29 30char buf[8000]; 31char comparison[8000]; 32static int errors = 0; 33 34vecInLong a128, b128, c128, d128, e128, f128; 35__m128i m128_16, m128_32, s128, m128_64, m128_128; 36__m64 m64_16, s64, m64_32, m64_64; 37 38const char *reference_sse2[] = { 39 "_mm_srai_epi16 0012 0012 0012 0012 0012 0012 0012 0012 \n", 40 "_mm_sra_epi16 0012 0012 0012 0012 0012 0012 0012 0012 \n", 41 "_mm_srai_epi32 00123456 00123456 00123456 00123456 \n", 42 "_mm_sra_epi32 00123456 00123456 00123456 00123456 \n", 43 "_mm_srli_epi16 0012 0012 0012 0012 0012 0012 0012 0012 \n", 44 "_mm_srl_epi16 0012 0012 0012 0012 0012 0012 0012 0012 \n", 45 "_mm_srli_epi32 00123456 00123456 00123456 00123456 \n", 46 "_mm_srl_epi32 00123456 00123456 00123456 00123456 \n", 47 "_mm_srli_epi64 00123456789abcde 00123456789abcde \n", 48 "_mm_srl_epi64 00123456789abcde 00123456789abcde \n", 49 "_mm_srli_si128 (byte shift) 00000000ffeeddccbbaa998877665544\n", 50 "_mm_slli_epi16 1230 1230 1230 1230 1230 1230 1230 1230 \n", 51 "_mm_sll_epi16 1230 1230 1230 1230 1230 1230 1230 1230 \n", 52 "_mm_slli_epi32 12345670 12345670 12345670 12345670 \n", 53 "_mm_sll_epi32 12345670 12345670 12345670 12345670 \n", 54 "_mm_slli_epi64 123456789abcdef0 123456789abcdef0 \n", 55 "_mm_sll_epi64 123456789abcdef0 123456789abcdef0 \n", 56 "_mm_sll_si128 (byte shift) bbaa9988776655443322110000000000\n", 57 "_mm_shuffle_epi32 ffeeddcc bbaa9988 77665544 33221100 \n", 58 "_mm_shuffelo_epi16 7766 5544 3322 1100 9988 bbaa ddcc ffee \n", 59 "_mm_shuffehi_epi16 1100 3322 5544 7766 ffee ddcc bbaa 9988 \n", 60 "" 61}; 62 63int main() 64{ 65 unsigned long cpu_facilities; 66 67 cpu_facilities = i386_cpuid (); 68 69 if ((cpu_facilities & (bit_MMX | bit_SSE | bit_SSE2 | bit_CMOV)) 70 != (bit_MMX | bit_SSE | bit_SSE2 | bit_CMOV)) 71 /* If host has no vector support, pass. */ 72 exit (0); 73 74 a128.s[0] = 0x01234567; 75 a128.s[1] = 0x01234567; 76 a128.s[2] = 0x01234567; 77 a128.s[3] = 0x01234567; 78 79 m128_32 = a128.v; 80 81 d128.u[0] = 0x0123456789abcdefULL; 82 d128.u[1] = 0x0123456789abcdefULL; 83 84 m128_64 = d128.v; 85 86 /* This is the 128-bit constant 0x00112233445566778899aabbccddeeff, 87 expressed as two little-endian 64-bit words. */ 88 e128.u[0] = 0x7766554433221100ULL; 89 e128.u[1] = 0xffeeddccbbaa9988ULL; 90 91 f128.t[0] = 0x0123; 92 f128.t[1] = 0x0123; 93 f128.t[2] = 0x0123; 94 f128.t[3] = 0x0123; 95 f128.t[4] = 0x0123; 96 f128.t[5] = 0x0123; 97 f128.t[6] = 0x0123; 98 f128.t[7] = 0x0123; 99 100 m128_16 = f128.v; 101 102 m128_128 = e128.v; 103 104 b128.s[0] = SHIFT; 105 b128.s[1] = 0; 106 b128.s[2] = 0; 107 b128.s[3] = 0; 108 109 s128 = b128.v; 110 111 if (cpu_facilities & bit_SSE2) 112 { 113 sse2_tests(); 114 check (buf, reference_sse2); 115#ifdef DEBUG 116 printf ("sse2 testing:\n"); 117 printf (buf); 118 printf ("\ncomparison:\n"); 119 printf (comparison); 120#endif 121 buf[0] = '\0'; 122 } 123 124 if (errors != 0) 125 abort (); 126 exit (0); 127} 128 129void NOINLINE 130sse2_tests (void) 131{ 132 /* psraw */ 133 c128.v = _mm_srai_epi16 (m128_16, SHIFT); 134 dump128_16 (buf, "_mm_srai_epi16", c128); 135 c128.v = _mm_sra_epi16 (m128_16, s128); 136 dump128_16 (buf, "_mm_sra_epi16", c128); 137 138 /* psrad */ 139 c128.v = _mm_srai_epi32 (m128_32, SHIFT); 140 dump128_32 (buf, "_mm_srai_epi32", c128); 141 c128.v = _mm_sra_epi32 (m128_32, s128); 142 dump128_32 (buf, "_mm_sra_epi32", c128); 143 144 /* psrlw */ 145 c128.v = _mm_srli_epi16 (m128_16, SHIFT); 146 dump128_16 (buf, "_mm_srli_epi16", c128); 147 c128.v = _mm_srl_epi16 (m128_16, s128); 148 dump128_16 (buf, "_mm_srl_epi16", c128); 149 150 /* psrld */ 151 c128.v = _mm_srli_epi32 (m128_32, SHIFT); 152 dump128_32 (buf, "_mm_srli_epi32", c128); 153 c128.v = _mm_srl_epi32 (m128_32, s128); 154 dump128_32 (buf, "_mm_srl_epi32", c128); 155 156 /* psrlq */ 157 c128.v = _mm_srli_epi64 (m128_64, SHIFT); 158 dump128_64 (buf, "_mm_srli_epi64", c128); 159 c128.v = _mm_srl_epi64 (m128_64, s128); 160 dump128_64 (buf, "_mm_srl_epi64", c128); 161 162 /* psrldq */ 163 c128.v = _mm_srli_si128 (m128_128, SHIFT); 164 dump128_128 (buf, "_mm_srli_si128 (byte shift) ", c128); 165 166 /* psllw */ 167 c128.v = _mm_slli_epi16 (m128_16, SHIFT); 168 dump128_16 (buf, "_mm_slli_epi16", c128); 169 c128.v = _mm_sll_epi16 (m128_16, s128); 170 dump128_16 (buf, "_mm_sll_epi16", c128); 171 172 /* pslld */ 173 c128.v = _mm_slli_epi32 (m128_32, SHIFT); 174 dump128_32 (buf, "_mm_slli_epi32", c128); 175 c128.v = _mm_sll_epi32 (m128_32, s128); 176 dump128_32 (buf, "_mm_sll_epi32", c128); 177 178 /* psllq */ 179 c128.v = _mm_slli_epi64 (m128_64, SHIFT); 180 dump128_64 (buf, "_mm_slli_epi64", c128); 181 c128.v = _mm_sll_epi64 (m128_64, s128); 182 dump128_64 (buf, "_mm_sll_epi64", c128); 183 184 /* pslldq */ 185 c128.v = _mm_slli_si128 (m128_128, SHIFT); 186 dump128_128 (buf, "_mm_sll_si128 (byte shift)", c128); 187 188 /* Shuffle constant 0x1b == 0b_00_01_10_11, e.g. swap words: ABCD => DCBA. */ 189 190 /* pshufd */ 191 c128.v = _mm_shuffle_epi32 (m128_128, 0x1b); 192 dump128_32 (buf, "_mm_shuffle_epi32", c128); 193 194 /* pshuflw */ 195 c128.v = _mm_shufflelo_epi16 (m128_128, 0x1b); 196 dump128_16 (buf, "_mm_shuffelo_epi16", c128); 197 198 /* pshufhw */ 199 c128.v = _mm_shufflehi_epi16 (m128_128, 0x1b); 200 dump128_16 (buf, "_mm_shuffehi_epi16", c128); 201} 202 203void 204dump128_16 (char *buf, char *name, vecInLong x) 205{ 206 int i; 207 char *p = buf + strlen (buf); 208 209 sprintf (p, "%s ", name); 210 p += strlen (p); 211 212 for (i=0; i<8; i++) 213 { 214 sprintf (p, "%4.4x ", x.t[i]); 215 p += strlen (p); 216 } 217 strcat (p, "\n"); 218} 219 220void 221dump128_32 (char *buf, char *name, vecInLong x) 222{ 223 int i; 224 char *p = buf + strlen (buf); 225 226 sprintf (p, "%s ", name); 227 p += strlen (p); 228 229 for (i=0; i<4; i++) 230 { 231 sprintf (p, "%8.8x ", x.s[i]); 232 p += strlen (p); 233 } 234 strcat (p, "\n"); 235} 236 237void 238dump128_64 (char *buf, char *name, vecInLong x) 239{ 240 int i; 241 char *p = buf + strlen (buf); 242 243 sprintf (p, "%s ", name); 244 p += strlen (p); 245 246 for (i=0; i<2; i++) 247 { 248 sprintf (p, "%16.16llx ", x.u[i]); 249 p += strlen (p); 250 } 251 strcat (p, "\n"); 252} 253 254void 255dump128_128 (char *buf, char *name, vecInLong x) 256{ 257 int i; 258 char *p = buf + strlen (buf); 259 260 sprintf (p, "%s ", name); 261 p += strlen (p); 262 263 for (i=15; i>=0; i--) 264 { 265 /* This is cheating; we don't have a 128-bit int format code. 266 Running the loop backwards to compensate for the 267 little-endian layout. */ 268 sprintf (p, "%2.2x", x.c[i]); 269 p += strlen (p); 270 } 271 strcat (p, "\n"); 272} 273 274int 275check (const char *input, const char *reference[]) 276{ 277 int broken, i, j, len; 278 const char *p_input; 279 char *p_comparison; 280 int new_errors = 0; 281 282 p_comparison = &comparison[0]; 283 p_input = input; 284 285 for (i = 0; *reference[i] != '\0'; i++) 286 { 287 broken = 0; 288 len = strlen (reference[i]); 289 for (j = 0; j < len; j++) 290 { 291 /* Ignore the terminating NUL characters at the end of every string in 'reference[]'. */ 292 if (!broken && *p_input != reference[i][j]) 293 { 294 *p_comparison = '\0'; 295 strcat (p_comparison, " >>> "); 296 p_comparison += strlen (p_comparison); 297 new_errors++; 298 broken = 1; 299 } 300 *p_comparison = *p_input; 301 p_comparison++; 302 p_input++; 303 } 304 if (broken) 305 { 306 *p_comparison = '\0'; 307 strcat (p_comparison, "expected:\n"); 308 strcat (p_comparison, reference[i]); 309 p_comparison += strlen (p_comparison); 310 } 311 } 312 *p_comparison = '\0'; 313 strcat (p_comparison, new_errors ? "failure\n\n" : "O.K.\n\n") ; 314 errors += new_errors; 315 return 0; 316} 317