1/* { dg-do run } */ 2/* { dg-require-effective-target sse4 } */ 3/* { dg-options "-O2 -msse4.1" } */ 4 5#ifndef CHECK_H 6#define CHECK_H "sse4_1-check.h" 7#endif 8 9#ifndef TEST 10#define TEST sse4_1_test 11#endif 12 13#include CHECK_H 14 15#include <smmintrin.h> 16#include <string.h> 17 18#define msk0 0xC0 19#define msk1 0x01 20#define msk2 0xF2 21#define msk3 0x03 22#define msk4 0x84 23#define msk5 0x05 24#define msk6 0xE6 25#define msk7 0x67 26 27static __m128i 28compute_mpsadbw (unsigned char *v1, unsigned char *v2, int mask) 29{ 30 union 31 { 32 __m128i x; 33 unsigned short s[8]; 34 } ret; 35 unsigned char s[4]; 36 int i, j; 37 int offs1, offs2; 38 39 offs2 = 4 * (mask & 3); 40 for (i = 0; i < 4; i++) 41 s[i] = v2[offs2 + i]; 42 43 offs1 = 4 * ((mask & 4) >> 2); 44 for (j = 0; j < 8; j++) 45 { 46 ret.s[j] = 0; 47 for (i = 0; i < 4; i++) 48 ret.s[j] += abs (v1[offs1 + j + i] - s[i]); 49 } 50 51 return ret.x; 52} 53 54static void 55TEST (void) 56{ 57 union 58 { 59 __m128i x; 60 unsigned int i[4]; 61 unsigned char c[16]; 62 } val1, val2, val3 [8]; 63 __m128i res[8], tmp; 64 unsigned char masks[8]; 65 int i; 66 67 val1.i[0] = 0x35251505; 68 val1.i[1] = 0x75655545; 69 val1.i[2] = 0xB5A59585; 70 val1.i[3] = 0xF5E5D5C5; 71 72 val2.i[0] = 0x31211101; 73 val2.i[1] = 0x71615141; 74 val2.i[2] = 0xB1A19181; 75 val2.i[3] = 0xF1E1D1C1; 76 77 for (i=0; i < 8; i++) 78 switch (i % 3) 79 { 80 case 1: 81 val3[i].i[0] = 0xF1E1D1C1; 82 val3[i].i[1] = 0xB1A19181; 83 val3[i].i[2] = 0x71615141; 84 val3[i].i[3] = 0x31211101; 85 break; 86 default: 87 val3[i].x = val2.x; 88 break; 89 } 90 91 /* Check mpsadbw imm8, xmm, xmm. */ 92 res[0] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk0); 93 res[1] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk1); 94 res[2] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk2); 95 res[3] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk3); 96 res[4] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk4); 97 res[5] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk5); 98 res[6] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk6); 99 res[7] = _mm_mpsadbw_epu8 (val1.x, val2.x, msk7); 100 101 masks[0] = msk0; 102 masks[1] = msk1; 103 masks[2] = msk2; 104 masks[3] = msk3; 105 masks[4] = msk4; 106 masks[5] = msk5; 107 masks[6] = msk6; 108 masks[7] = msk7; 109 110 for (i=0; i < 8; i++) 111 { 112 tmp = compute_mpsadbw (val1.c, val2.c, masks[i]); 113 if (memcmp (&tmp, &res[i], sizeof (tmp))) 114 abort (); 115 } 116 117 /* Check mpsadbw imm8, m128, xmm. */ 118 for (i=0; i < 8; i++) 119 { 120 res[i] = _mm_mpsadbw_epu8 (val1.x, val3[i].x, msk4); 121 masks[i] = msk4; 122 } 123 124 for (i=0; i < 8; i++) 125 { 126 tmp = compute_mpsadbw (val1.c, val3[i].c, masks[i]); 127 if (memcmp (&tmp, &res[i], sizeof (tmp))) 128 abort (); 129 } 130} 131