1/* { dg-do run } */ 2/* { dg-require-effective-target fma4 } */ 3/* { dg-options "-O0 -mfma4" } */ 4 5#include "fma4-check.h" 6 7#include <x86intrin.h> 8#include <string.h> 9 10#define NUM 20 11 12union 13{ 14 __m128 x[NUM]; 15 float f[NUM * 4]; 16 __m128d y[NUM]; 17 double d[NUM * 2]; 18} dst, res, src1, src2, src3; 19 20/* Note that in macc*,msub*,mnmacc* and mnsub* instructions, the intermdediate 21 product is not rounded, only the addition is rounded. */ 22 23static void 24init_nmsubps () 25{ 26 int i; 27 for (i = 0; i < NUM * 4; i++) 28 { 29 src1.f[i] = i; 30 src2.f[i] = i + 10; 31 src3.f[i] = i + 20; 32 } 33} 34 35static void 36init_nmsubpd () 37{ 38 int i; 39 for (i = 0; i < NUM * 4; i++) 40 { 41 src1.d[i] = i; 42 src2.d[i] = i + 10; 43 src3.d[i] = i + 20; 44 } 45} 46 47static int 48check_nmsubps () 49{ 50 int i, j, check_fails = 0; 51 for (i = 0; i < NUM * 4; i = i + 4) 52 for (j = 0; j < 4; j++) 53 { 54 res.f[i + j] = - (src1.f[i + j] * src2.f[i + j]) - src3.f[i + j]; 55 if (dst.f[i + j] != res.f[i + j]) 56 check_fails++; 57 } 58 return check_fails++; 59} 60 61static int 62check_nmsubpd () 63{ 64 int i, j, check_fails = 0; 65 for (i = 0; i < NUM * 2; i = i + 2) 66 for (j = 0; j < 2; j++) 67 { 68 res.d[i + j] = - (src1.d[i + j] * src2.d[i + j]) - src3.d[i + j]; 69 if (dst.d[i + j] != res.d[i + j]) 70 check_fails++; 71 } 72 return check_fails++; 73} 74 75 76static int 77check_nmsubss () 78{ 79 int i, j, check_fails = 0; 80 for (i = 0; i < NUM * 4; i = i + 4) 81 { 82 res.f[i] = - (src1.f[i] * src2.f[i]) - src3.f[i]; 83 if (dst.f[i] != res.f[i]) 84 check_fails++; 85 } 86 return check_fails++; 87} 88 89static int 90check_nmsubsd () 91{ 92 int i, j, check_fails = 0; 93 for (i = 0; i < NUM * 2; i = i + 2) 94 { 95 res.d[i] = - (src1.d[i] * src2.d[i]) - src3.d[i]; 96 if (dst.d[i] != res.d[i]) 97 check_fails++; 98 } 99 return check_fails++; 100} 101 102static void 103fma4_test (void) 104{ 105 int i; 106 107 init_nmsubps (); 108 109 for (i = 0; i < NUM; i++) 110 dst.x[i] = _mm_nmsub_ps (src1.x[i], src2.x[i], src3.x[i]); 111 112 if (check_nmsubps (&dst.x[i], &src1.f[i * 4], &src2.f[i * 4], &src3.f[i * 4])) 113 abort (); 114 115 116 for (i = 0; i < NUM; i++) 117 dst.x[i] = _mm_nmsub_ss (src1.x[i], src2.x[i], src3.x[i]); 118 119 if (check_nmsubss (&dst.x[i], &src1.f[i * 4], &src2.f[i * 4], &src3.f[i * 4])) 120 abort (); 121 122 init_nmsubpd (); 123 124 for (i = 0; i < NUM; i++) 125 dst.y[i] = _mm_nmsub_pd (src1.y[i], src2.y[i], src3.y[i]); 126 127 if (check_nmsubpd (&dst.y[i], &src1.d[i * 2], &src2.d[i * 2], &src3.d[i * 2])) 128 abort (); 129 130 131 for (i = 0; i < NUM; i++) 132 dst.y[i] = _mm_nmsub_sd (src1.y[i], src2.y[i], src3.y[i]); 133 134 if (check_nmsubsd (&dst.y[i], &src1.d[i * 2], &src2.d[i * 2], &src3.d[i * 2])) 135 abort (); 136 137} 138