1166144Smarius/* { dg-do run } */ 2166144Smarius/* { dg-require-effective-target fma4 } */ 3166144Smarius/* { dg-options "-O0 -mfma4" } */ 4166144Smarius 5166144Smarius#include "fma4-check.h" 6166144Smarius 7166144Smarius#include <x86intrin.h> 8166144Smarius#include <string.h> 9166144Smarius 10166144Smarius#define NUM 20 11166144Smarius 12166144Smariusunion 13166144Smarius{ 14166144Smarius __m128 x[NUM]; 15166144Smarius float f[NUM * 4]; 16166144Smarius __m128d y[NUM]; 17166144Smarius double d[NUM * 2]; 18166144Smarius} dst, res, src1, src2, src3; 19166144Smarius 20166144Smarius/* Note that in macc*,msub*,mnmacc* and mnsub* instructions, the intermdediate 21166144Smarius product is not rounded, only the addition is rounded. */ 22166144Smarius 23166144Smariusstatic void 24166144Smariusinit_nmaccps () 25166144Smarius{ 26166144Smarius int i; 27166144Smarius for (i = 0; i < NUM * 4; i++) 28166144Smarius { 29166144Smarius src1.f[i] = i; 30166144Smarius src2.f[i] = i + 10; 31166144Smarius src3.f[i] = i + 20; 32166144Smarius } 33166144Smarius} 34166144Smarius 35166144Smariusstatic void 36166144Smariusinit_nmaccpd () 37166144Smarius{ 38166144Smarius int i; 39166144Smarius for (i = 0; i < NUM * 4; i++) 40166144Smarius { 41166144Smarius src1.d[i] = i; 42166144Smarius src2.d[i] = i + 10; 43166144Smarius src3.d[i] = i + 20; 44166144Smarius } 45166144Smarius} 46166144Smarius 47166144Smariusstatic int 48166144Smariuscheck_nmaccps () 49166144Smarius{ 50166144Smarius int i, j, check_fails = 0; 51166144Smarius for (i = 0; i < NUM * 4; i = i + 4) 52166144Smarius for (j = 0; j < 4; j++) 53166144Smarius { 54166144Smarius res.f[i + j] = - (src1.f[i + j] * src2.f[i + j]) + src3.f[i + j]; 55166144Smarius if (dst.f[i + j] != res.f[i + j]) 56166144Smarius check_fails++; 57166144Smarius } 58166144Smarius return check_fails++; 59166144Smarius} 60166144Smarius 61166144Smariusstatic int 62166144Smariuscheck_nmaccpd () 63166144Smarius{ 64166144Smarius int i, j, check_fails = 0; 65166144Smarius for (i = 0; i < NUM * 2; i = i + 2) 66166144Smarius for (j = 0; j < 2; j++) 67166144Smarius { 68166144Smarius res.d[i + j] = - (src1.d[i + j] * src2.d[i + j]) + src3.d[i + j]; 69166144Smarius if (dst.d[i + j] != res.d[i + j]) 70166144Smarius check_fails++; 71166144Smarius } 72166144Smarius return check_fails++; 73166144Smarius} 74166144Smarius 75166144Smarius 76166144Smariusstatic int 77166144Smariuscheck_nmaccss () 78166144Smarius{ 79166144Smarius int i, j, check_fails = 0; 80166144Smarius for (i = 0; i < NUM * 4; i = i + 4) 81166144Smarius { 82166144Smarius res.f[i] = - (src1.f[i] * src2.f[i]) + src3.f[i]; 83166144Smarius if (dst.f[i] != res.f[i]) 84225931Smarius check_fails++; 85166144Smarius } 86166144Smarius return check_fails++; 87166144Smarius} 88166144Smarius 89166144Smariusstatic int 90166144Smariuscheck_nmaccsd () 91166144Smarius{ 92166144Smarius int i, j, check_fails = 0; 93166144Smarius for (i = 0; i < NUM * 2; i = i + 2) 94166144Smarius { 95166144Smarius res.d[i] = - (src1.d[i] * src2.d[i]) + src3.d[i]; 96166144Smarius if (dst.d[i] != res.d[i]) 97166144Smarius check_fails++; 98166144Smarius } 99166144Smarius return check_fails++; 100166144Smarius} 101166144Smarius 102166144Smariusstatic void 103166144Smariusfma4_test (void) 104183337Smarius{ 105183337Smarius int i; 106166144Smarius 107166144Smarius init_nmaccps (); 108166144Smarius 109166144Smarius for (i = 0; i < NUM; i++) 110166144Smarius dst.x[i] = _mm_nmacc_ps (src1.x[i], src2.x[i], src3.x[i]); 111166144Smarius 112166144Smarius if (check_nmaccps ()) 113166144Smarius abort (); 114166144Smarius 115166144Smarius 116166144Smarius for (i = 0; i < NUM; i++) 117166144Smarius dst.x[i] = _mm_nmacc_ss (src1.x[i], src2.x[i], src3.x[i]); 118166144Smarius 119166144Smarius if (check_nmaccss ()) 120166144Smarius abort (); 121166144Smarius 122166144Smarius init_nmaccpd (); 123166144Smarius 124166144Smarius for (i = 0; i < NUM; i++) 125166144Smarius dst.y[i] = _mm_nmacc_pd (src1.y[i], src2.y[i], src3.y[i]); 126166144Smarius 127166144Smarius if (check_nmaccpd ()) 128166144Smarius abort (); 129166144Smarius 130166144Smarius 131166144Smarius for (i = 0; i < NUM; i++) 132166144Smarius dst.y[i] = _mm_nmacc_sd (src1.y[i], src2.y[i], src3.y[i]); 133166144Smarius 134166144Smarius if (check_nmaccsd ()) 135166144Smarius abort (); 136166144Smarius 137166144Smarius} 138166144Smarius