1262182Semaste/* { dg-do run } */ 2262182Semaste/* { dg-require-effective-target xop } */ 3262182Semaste/* { dg-options "-O2 -mxop" } */ 4262182Semaste 5262182Semaste#include "xop-check.h" 6262182Semaste 7262182Semaste#include <x86intrin.h> 8262182Semaste#include <string.h> 9262182Semaste 10262182Semaste#define NUM 10 11262182Semaste 12262182Semasteunion 13262182Semaste{ 14262182Semaste __m128i x[NUM]; 15262182Semaste unsigned char ssi[NUM * 16]; 16262182Semaste unsigned short si[NUM * 8]; 17262182Semaste unsigned int li[NUM * 4]; 18262182Semaste unsigned long long lli[NUM * 2]; 19262182Semaste} dst, res, src1; 20262182Semaste 21262182Semastestatic void 22262182Semasteinit_byte () 23262182Semaste{ 24262182Semaste int i; 25262182Semaste for (i=0; i < NUM * 16; i++) 26262182Semaste src1.ssi[i] = i; 27262182Semaste} 28262182Semaste 29262182Semastestatic void 30262182Semasteinit_word () 31262182Semaste{ 32262182Semaste int i; 33262182Semaste for (i=0; i < NUM * 8; i++) 34262182Semaste src1.si[i] = i; 35262182Semaste} 36262182Semaste 37262182Semaste 38262182Semastestatic void 39262182Semasteinit_dword () 40262182Semaste{ 41262182Semaste int i; 42262182Semaste for (i=0; i < NUM * 4; i++) 43262182Semaste src1.li[i] = i; 44262182Semaste} 45262182Semaste 46262182Semastestatic int 47262182Semastecheck_byte2word () 48262182Semaste{ 49262182Semaste int i, j, s, t, check_fails = 0; 50262182Semaste for (i = 0; i < NUM * 16; i = i + 16) 51262182Semaste { 52262182Semaste for (j = 0; j < 8; j++) 53262182Semaste { 54262182Semaste t = i + (2 * j); 55262182Semaste s = (i / 2) + j; 56262182Semaste res.si[s] = src1.ssi[t] + src1.ssi[t + 1] ; 57262182Semaste if (res.si[s] != dst.si[s]) 58262182Semaste check_fails++; 59262182Semaste } 60262182Semaste } 61262182Semaste} 62262182Semaste 63262182Semastestatic int 64262182Semastecheck_byte2dword () 65262182Semaste{ 66262182Semaste int i, j, s, t, check_fails = 0; 67262182Semaste for (i = 0; i < NUM * 16; i = i + 16) 68262182Semaste { 69262182Semaste for (j = 0; j < 4; j++) 70262182Semaste { 71262182Semaste t = i + (4 * j); 72262182Semaste s = (i / 4) + j; 73262182Semaste res.li[s] = (src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2] 74262182Semaste + src1.ssi[t + 3]); 75262182Semaste if (res.li[s] != dst.li[s]) 76262182Semaste check_fails++; 77262182Semaste } 78262182Semaste } 79262182Semaste return check_fails++; 80262182Semaste} 81262182Semaste 82262182Semastestatic int 83262182Semastecheck_byte2qword () 84262182Semaste{ 85262182Semaste int i, j, s, t, check_fails = 0; 86262182Semaste for (i = 0; i < NUM * 16; i = i + 16) 87262182Semaste { 88262182Semaste for (j = 0; j < 2; j++) 89262182Semaste { 90262182Semaste t = i + (8 * j); 91262182Semaste s = (i / 8) + j; 92262182Semaste res.lli[s] = ((src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2] 93262182Semaste + src1.ssi[t + 3])) + ((src1.ssi[t + 4] + src1.ssi[t +5]) 94262182Semaste + (src1.ssi[t + 6] + src1.ssi[t + 7])); 95262182Semaste if (res.lli[s] != dst.lli[s]) 96262182Semaste check_fails++; 97262182Semaste } 98262182Semaste } 99262182Semaste return check_fails++; 100262182Semaste} 101262182Semaste 102262182Semastestatic int 103262182Semastecheck_word2dword () 104262182Semaste{ 105262182Semaste int i, j, s, t, check_fails = 0; 106262182Semaste for (i = 0; i < (NUM * 8); i = i + 8) 107262182Semaste { 108262182Semaste for (j = 0; j < 4; j++) 109262182Semaste { 110262182Semaste t = i + (2 * j); 111262182Semaste s = (i / 2) + j; 112262182Semaste res.li[s] = src1.si[t] + src1.si[t + 1] ; 113262182Semaste if (res.li[s] != dst.li[s]) 114262182Semaste check_fails++; 115262182Semaste } 116262182Semaste } 117262182Semaste} 118262182Semaste 119262182Semastestatic int 120262182Semastecheck_word2qword () 121262182Semaste{ 122262182Semaste int i, j, s, t, check_fails = 0; 123262182Semaste for (i = 0; i < NUM * 8; i = i + 8) 124262182Semaste { 125262182Semaste for (j = 0; j < 2; j++) 126262182Semaste { 127262182Semaste t = i + (4 * j); 128262182Semaste s = (i / 4) + j; 129262182Semaste res.lli[s] = (src1.si[t] + src1.si[t + 1]) + (src1.si[t + 2] 130262182Semaste + src1.si[t + 3]); 131262182Semaste if (res.lli[s] != dst.lli[s]) 132262182Semaste check_fails++; 133262182Semaste } 134262182Semaste } 135262182Semaste return check_fails++; 136262182Semaste} 137262182Semaste 138262182Semastestatic int 139262182Semastecheck_dword2qword () 140262182Semaste{ 141262182Semaste int i, j, s, t, check_fails = 0; 142262182Semaste for (i = 0; i < (NUM * 4); i = i + 4) 143262182Semaste { 144262182Semaste for (j = 0; j < 2; j++) 145262182Semaste { 146262182Semaste t = i + (2 * j); 147262182Semaste s = (i / 2) + j; 148262182Semaste res.lli[s] = src1.li[t] + src1.li[t + 1] ; 149262182Semaste if (res.lli[s] != dst.lli[s]) 150262182Semaste check_fails++; 151262182Semaste } 152262182Semaste } 153262182Semaste} 154262182Semaste 155262182Semastestatic void 156262182Semastexop_test (void) 157262182Semaste{ 158262182Semaste int i; 159262182Semaste 160262182Semaste /* Check haddubw */ 161262182Semaste init_byte (); 162262182Semaste 163262182Semaste for (i = 0; i < NUM; i++) 164262182Semaste dst.x[i] = _mm_haddw_epu8 (src1.x[i]); 165262182Semaste 166262182Semaste if (check_byte2word()) 167262182Semaste abort (); 168262182Semaste 169262182Semaste /* Check haddubd */ 170262182Semaste for (i = 0; i < (NUM ); i++) 171262182Semaste dst.x[i] = _mm_haddd_epu8 (src1.x[i]); 172262182Semaste 173262182Semaste if (check_byte2dword()) 174262182Semaste abort (); 175262182Semaste 176262182Semaste /* Check haddubq */ 177262182Semaste for (i = 0; i < NUM; i++) 178262182Semaste dst.x[i] = _mm_haddq_epu8 (src1.x[i]); 179262182Semaste 180262182Semaste if (check_byte2qword()) 181262182Semaste abort (); 182262182Semaste 183262182Semaste /* Check hadduwd */ 184262182Semaste init_word (); 185262182Semaste 186262182Semaste for (i = 0; i < (NUM ); i++) 187262182Semaste dst.x[i] = _mm_haddd_epu16 (src1.x[i]); 188262182Semaste 189262182Semaste if (check_word2dword()) 190262182Semaste abort (); 191262182Semaste 192262182Semaste /* Check haddbuwq */ 193262182Semaste 194262182Semaste for (i = 0; i < NUM; i++) 195262182Semaste dst.x[i] = _mm_haddq_epu16 (src1.x[i]); 196262182Semaste 197262182Semaste if (check_word2qword()) 198262182Semaste abort (); 199262182Semaste 200262182Semaste /* Check hadudq */ 201262182Semaste init_dword (); 202262182Semaste for (i = 0; i < NUM; i++) 203262182Semaste dst.x[i] = _mm_haddq_epu32 (src1.x[i]); 204262182Semaste 205262182Semaste if (check_dword2qword()) 206262182Semaste abort (); 207262182Semaste} 208262182Semaste