1/* { dg-do run } */ 2/* { dg-require-effective-target ssse3 } */ 3/* { dg-options "-O2 -fno-strict-aliasing -mssse3" } */ 4 5#ifndef CHECK_H 6#define CHECK_H "ssse3-check.h" 7#endif 8 9#ifndef TEST 10#define TEST ssse3_test 11#endif 12 13#include CHECK_H 14 15#include "ssse3-vals.h" 16 17#include <tmmintrin.h> 18#include <string.h> 19 20#ifndef __AVX__ 21/* Test the 64-bit form */ 22static void 23ssse3_test_palignr (int *i1, int *i2, unsigned int imm, int *r) 24{ 25 __m64 t1 = *(__m64 *) i1; 26 __m64 t2 = *(__m64 *) i2; 27 28 switch (imm) 29 { 30 case 0: 31 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 0); 32 break; 33 case 1: 34 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 1); 35 break; 36 case 2: 37 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 2); 38 break; 39 case 3: 40 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 3); 41 break; 42 case 4: 43 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 4); 44 break; 45 case 5: 46 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 5); 47 break; 48 case 6: 49 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 6); 50 break; 51 case 7: 52 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 7); 53 break; 54 case 8: 55 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 8); 56 break; 57 case 9: 58 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 9); 59 break; 60 case 10: 61 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 10); 62 break; 63 case 11: 64 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 11); 65 break; 66 case 12: 67 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 12); 68 break; 69 case 13: 70 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 13); 71 break; 72 case 14: 73 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 14); 74 break; 75 case 15: 76 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 15); 77 break; 78 default: 79 *(__m64 *) r = _mm_alignr_pi8 (t1, t2, 16); 80 break; 81 } 82 83 _mm_empty(); 84} 85#endif 86 87/* Test the 128-bit form */ 88static void 89ssse3_test_palignr128 (int *i1, int *i2, unsigned int imm, int *r) 90{ 91 /* Assumes incoming pointers are 16-byte aligned */ 92 __m128i t1 = *(__m128i *) i1; 93 __m128i t2 = *(__m128i *) i2; 94 95 switch (imm) 96 { 97 case 0: 98 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 0); 99 break; 100 case 1: 101 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 1); 102 break; 103 case 2: 104 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 2); 105 break; 106 case 3: 107 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 3); 108 break; 109 case 4: 110 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 4); 111 break; 112 case 5: 113 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 5); 114 break; 115 case 6: 116 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 6); 117 break; 118 case 7: 119 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 7); 120 break; 121 case 8: 122 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 8); 123 break; 124 case 9: 125 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 9); 126 break; 127 case 10: 128 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 10); 129 break; 130 case 11: 131 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 11); 132 break; 133 case 12: 134 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 12); 135 break; 136 case 13: 137 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 13); 138 break; 139 case 14: 140 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 14); 141 break; 142 case 15: 143 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 15); 144 break; 145 case 16: 146 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 16); 147 break; 148 case 17: 149 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 17); 150 break; 151 case 18: 152 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 18); 153 break; 154 case 19: 155 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 19); 156 break; 157 case 20: 158 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 20); 159 break; 160 case 21: 161 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 21); 162 break; 163 case 22: 164 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 22); 165 break; 166 case 23: 167 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 23); 168 break; 169 case 24: 170 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 24); 171 break; 172 case 25: 173 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 25); 174 break; 175 case 26: 176 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 26); 177 break; 178 case 27: 179 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 27); 180 break; 181 case 28: 182 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 28); 183 break; 184 case 29: 185 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 29); 186 break; 187 case 30: 188 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 30); 189 break; 190 case 31: 191 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 31); 192 break; 193 default: 194 *(__m128i *) r = _mm_alignr_epi8 (t1, t2, 32); 195 break; 196 } 197} 198 199/* Routine to manually compute the results */ 200static void 201compute_correct_result_128 (int *i1, int *i2, unsigned int imm, int *r) 202{ 203 char buf [32]; 204 char *bout = (char *) r; 205 int i; 206 207 memcpy (&buf[0], i2, 16); 208 memcpy (&buf[16], i1, 16); 209 210 for (i = 0; i < 16; i++) 211 if (imm >= 32 || imm + i >= 32) 212 bout[i] = 0; 213 else 214 bout[i] = buf[imm + i]; 215} 216 217#ifndef __AVX__ 218static void 219compute_correct_result_64 (int *i1, int *i2, unsigned int imm, int *r) 220{ 221 char buf [16]; 222 char *bout = (char *)r; 223 int i; 224 225 /* Handle the first half */ 226 memcpy (&buf[0], i2, 8); 227 memcpy (&buf[8], i1, 8); 228 229 for (i = 0; i < 8; i++) 230 if (imm >= 16 || imm + i >= 16) 231 bout[i] = 0; 232 else 233 bout[i] = buf[imm + i]; 234 235 /* Handle the second half */ 236 memcpy (&buf[0], &i2[2], 8); 237 memcpy (&buf[8], &i1[2], 8); 238 239 for (i = 0; i < 8; i++) 240 if (imm >= 16 || imm + i >= 16) 241 bout[i + 8] = 0; 242 else 243 bout[i + 8] = buf[imm + i]; 244} 245#endif 246 247static void 248TEST (void) 249{ 250 int i; 251 int r [4] __attribute__ ((aligned(16))); 252 int ck [4]; 253 unsigned int imm; 254 int fail = 0; 255 256 for (i = 0; i < 256; i += 8) 257 for (imm = 0; imm < 100; imm++) 258 { 259#ifndef __AVX__ 260 /* Manually compute the result */ 261 compute_correct_result_64 (&vals[i + 0], &vals[i + 4], imm, ck); 262 263 /* Run the 64-bit tests */ 264 ssse3_test_palignr (&vals[i + 0], &vals[i + 4], imm, &r[0]); 265 ssse3_test_palignr (&vals[i + 2], &vals[i + 6], imm, &r[2]); 266 fail += chk_128 (ck, r); 267#endif 268 269 /* Recompute the results for 128-bits */ 270 compute_correct_result_128 (&vals[i + 0], &vals[i + 4], imm, ck); 271 272 /* Run the 128-bit tests */ 273 ssse3_test_palignr128 (&vals[i + 0], &vals[i + 4], imm, r); 274 fail += chk_128 (ck, r); 275 } 276 277 if (fail != 0) 278 abort (); 279} 280