1/* { dg-do run } */ 2/* { dg-require-effective-target fma4 } */ 3/* { dg-options "-O2 -mfma4" } */ 4 5#include "fma4-check.h" 6 7#include <x86intrin.h> 8#include <string.h> 9 10#define NUM 20 11 12union 13{ 14 __m256 x[NUM]; 15 float f[NUM * 8]; 16 __m256d y[NUM]; 17 double d[NUM * 4]; 18} dst, res, src1, src2, src3; 19 20/* Note that in macc*,msub*,mnmacc* and mnsub* instructions, the intermdediate 21 product is not rounded, only the addition is rounded. */ 22 23static void 24init_nmaccps () 25{ 26 int i; 27 for (i = 0; i < NUM * 8; i++) 28 { 29 src1.f[i] = i; 30 src2.f[i] = i + 10; 31 src3.f[i] = i + 20; 32 } 33} 34 35static void 36init_nmaccpd () 37{ 38 int i; 39 for (i = 0; i < NUM * 4; i++) 40 { 41 src1.d[i] = i; 42 src2.d[i] = i + 10; 43 src3.d[i] = i + 20; 44 } 45} 46 47static int 48check_nmaccps () 49{ 50 int i, j, check_fails = 0; 51 for (i = 0; i < NUM * 8; i = i + 8) 52 for (j = 0; j < 8; j++) 53 { 54 res.f[i + j] = - (src1.f[i + j] * src2.f[i + j]) + src3.f[i + j]; 55 if (dst.f[i + j] != res.f[i + j]) 56 check_fails++; 57 } 58 return check_fails++; 59} 60 61static int 62check_nmaccpd () 63{ 64 int i, j, check_fails = 0; 65 for (i = 0; i < NUM * 4; i = i + 4) 66 for (j = 0; j < 4; j++) 67 { 68 res.d[i + j] = - (src1.d[i + j] * src2.d[i + j]) + src3.d[i + j]; 69 if (dst.d[i + j] != res.d[i + j]) 70 check_fails++; 71 } 72 return check_fails++; 73} 74 75static void 76fma4_test (void) 77{ 78 int i; 79 80 init_nmaccps (); 81 82 for (i = 0; i < NUM; i++) 83 dst.x[i] = _mm256_nmacc_ps (src1.x[i], src2.x[i], src3.x[i]); 84 85 if (check_nmaccps ()) 86 abort (); 87 88 init_nmaccpd (); 89 90 for (i = 0; i < NUM; i++) 91 dst.y[i] = _mm256_nmacc_pd (src1.y[i], src2.y[i], src3.y[i]); 92 93 if (check_nmaccpd ()) 94 abort (); 95 96} 97