1/* { dg-do run } */ 2/* { dg-require-effective-target xop } */ 3/* { dg-options "-O2 -mxop" } */ 4 5#include "xop-check.h" 6 7#include <x86intrin.h> 8#include <string.h> 9 10#define NUM 10 11 12union 13{ 14 __m128i x[NUM]; 15 signed char ssi[NUM * 16]; 16 short si[NUM * 8]; 17 int li[NUM * 4]; 18 long long lli[NUM * 2]; 19} dst, res, src1; 20 21static void 22init_sbyte () 23{ 24 int i; 25 for (i=0; i < NUM * 16; i++) 26 src1.ssi[i] = i; 27} 28 29static void 30init_sword () 31{ 32 int i; 33 for (i=0; i < NUM * 8; i++) 34 src1.si[i] = i; 35} 36 37 38static void 39init_sdword () 40{ 41 int i; 42 for (i=0; i < NUM * 4; i++) 43 src1.li[i] = i; 44} 45 46static int 47check_sbyte2word () 48{ 49 int i, j, s, t, check_fails = 0; 50 for (i = 0; i < NUM * 16; i = i + 16) 51 { 52 for (j = 0; j < 8; j++) 53 { 54 t = i + (2 * j); 55 s = (i / 2) + j; 56 res.si[s] = src1.ssi[t] + src1.ssi[t + 1] ; 57 if (res.si[s] != dst.si[s]) 58 check_fails++; 59 } 60 } 61} 62 63static int 64check_sbyte2dword () 65{ 66 int i, j, s, t, check_fails = 0; 67 for (i = 0; i < NUM * 16; i = i + 16) 68 { 69 for (j = 0; j < 4; j++) 70 { 71 t = i + (4 * j); 72 s = (i / 4) + j; 73 res.li[s] = (src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2] 74 + src1.ssi[t + 3]); 75 if (res.li[s] != dst.li[s]) 76 check_fails++; 77 } 78 } 79 return check_fails++; 80} 81 82static int 83check_sbyte2qword () 84{ 85 int i, j, s, t, check_fails = 0; 86 for (i = 0; i < NUM * 16; i = i + 16) 87 { 88 for (j = 0; j < 2; j++) 89 { 90 t = i + (8 * j); 91 s = (i / 8) + j; 92 res.lli[s] = ((src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2] 93 + src1.ssi[t + 3])) + ((src1.ssi[t + 4] + src1.ssi[t +5]) 94 + (src1.ssi[t + 6] + src1.ssi[t + 7])); 95 if (res.lli[s] != dst.lli[s]) 96 check_fails++; 97 } 98 } 99 return check_fails++; 100} 101 102static int 103check_sword2dword () 104{ 105 int i, j, s, t, check_fails = 0; 106 for (i = 0; i < (NUM * 8); i = i + 8) 107 { 108 for (j = 0; j < 4; j++) 109 { 110 t = i + (2 * j); 111 s = (i / 2) + j; 112 res.li[s] = src1.si[t] + src1.si[t + 1] ; 113 if (res.li[s] != dst.li[s]) 114 check_fails++; 115 } 116 } 117} 118 119static int 120check_sword2qword () 121{ 122 int i, j, s, t, check_fails = 0; 123 for (i = 0; i < NUM * 8; i = i + 8) 124 { 125 for (j = 0; j < 2; j++) 126 { 127 t = i + (4 * j); 128 s = (i / 4) + j; 129 res.lli[s] = (src1.si[t] + src1.si[t + 1]) + (src1.si[t + 2] 130 + src1.si[t + 3]); 131 if (res.lli[s] != dst.lli[s]) 132 check_fails++; 133 } 134 } 135 return check_fails++; 136} 137 138static int 139check_dword2qword () 140{ 141 int i, j, s, t, check_fails = 0; 142 for (i = 0; i < (NUM * 4); i = i + 4) 143 { 144 for (j = 0; j < 2; j++) 145 { 146 t = i + (2 * j); 147 s = (i / 2) + j; 148 res.lli[s] = src1.li[t] + src1.li[t + 1] ; 149 if (res.lli[s] != dst.lli[s]) 150 check_fails++; 151 } 152 } 153} 154 155static void 156xop_test (void) 157{ 158 int i; 159 160 init_sbyte (); 161 162 for (i = 0; i < NUM; i++) 163 dst.x[i] = _mm_haddw_epi8 (src1.x[i]); 164 165 if (check_sbyte2word()) 166 abort (); 167 168 169 for (i = 0; i < (NUM ); i++) 170 dst.x[i] = _mm_haddd_epi8 (src1.x[i]); 171 172 if (check_sbyte2dword()) 173 abort (); 174 175 176 for (i = 0; i < NUM; i++) 177 dst.x[i] = _mm_haddq_epi8 (src1.x[i]); 178 179 if (check_sbyte2qword()) 180 abort (); 181 182 183 init_sword (); 184 185 for (i = 0; i < (NUM ); i++) 186 dst.x[i] = _mm_haddd_epi16 (src1.x[i]); 187 188 if (check_sword2dword()) 189 abort (); 190 191 for (i = 0; i < NUM; i++) 192 dst.x[i] = _mm_haddq_epi16 (src1.x[i]); 193 194 if (check_sword2qword()) 195 abort (); 196 197 198 init_sdword (); 199 200 for (i = 0; i < NUM; i++) 201 dst.x[i] = _mm_haddq_epi32 (src1.x[i]); 202 203 if (check_dword2qword()) 204 abort (); 205 206} 207