1/* { dg-do run } */ 2/* { dg-require-effective-target fma4 } */ 3/* { dg-options "-O0 -mfma4" } */ 4 5#include "fma4-check.h" 6 7#include <x86intrin.h> 8#include <string.h> 9 10#define NUM 20 11 12union 13{ 14 __m128 x[NUM]; 15 float f[NUM * 4]; 16 __m128d y[NUM]; 17 double d[NUM * 2]; 18} dst, res, src1, src2, src3; 19 20/* Note that in macc*,msub*,mnmacc* and mnsub* instructions, the intermdediate 21 product is not rounded, only the addition is rounded. */ 22 23static void 24init_msubps () 25{ 26 int i; 27 for (i = 0; i < NUM * 4; i++) 28 { 29 src1.f[i] = i; 30 src2.f[i] = i + 10; 31 src3.f[i] = i + 20; 32 } 33} 34 35static void 36init_msubpd () 37{ 38 int i; 39 for (i = 0; i < NUM * 4; i++) 40 { 41 src1.d[i] = i; 42 src2.d[i] = i + 10; 43 src3.d[i] = i + 20; 44 } 45} 46 47static int 48check_msubps () 49{ 50 int i, j, check_fails = 0; 51 for (i = 0; i < NUM * 4; i = i + 4) 52 for (j = 0; j < 4; j++) 53 { 54 res.f[i + j] = (src1.f[i + j] * src2.f[i + j]) - src3.f[i + j]; 55 if (dst.f[i + j] != res.f[i + j]) 56 check_fails++; 57 } 58 return check_fails++; 59} 60 61static int 62check_msubpd () 63{ 64 int i, j, check_fails = 0; 65 for (i = 0; i < NUM * 2; i = i + 2) 66 for (j = 0; j < 2; j++) 67 { 68 res.d[i + j] = (src1.d[i + j] * src2.d[i + j]) - src3.d[i + j]; 69 if (dst.d[i + j] != res.d[i + j]) 70 check_fails++; 71 } 72 return check_fails++; 73} 74 75 76static int 77check_msubss () 78{ 79 int i, j, check_fails = 0; 80 for (i = 0; i < NUM * 4; i = i + 4) 81 { 82 res.f[i] = (src1.f[i] * src2.f[i]) - src3.f[i]; 83 if (dst.f[i] != res.f[i]) 84 check_fails++; 85 } 86 return check_fails++; 87} 88 89static int 90check_msubsd () 91{ 92 int i, j, check_fails = 0; 93 for (i = 0; i < NUM * 2; i = i + 2) 94 { 95 res.d[i] = (src1.d[i] * src2.d[i]) - src3.d[i]; 96 if (dst.d[i] != res.d[i]) 97 check_fails++; 98 } 99 return check_fails++; 100} 101 102static void 103fma4_test (void) 104{ 105 int i; 106 107 init_msubps (); 108 109 for (i = 0; i < NUM; i++) 110 dst.x[i] = _mm_msub_ps (src1.x[i], src2.x[i], src3.x[i]); 111 112 if (check_msubps ()) 113 abort (); 114 115 for (i = 0; i < NUM; i++) 116 dst.x[i] = _mm_msub_ss (src1.x[i], src2.x[i], src3.x[i]); 117 118 if (check_msubss ()) 119 abort (); 120 121 init_msubpd (); 122 123 for (i = 0; i < NUM; i++) 124 dst.y[i] = _mm_msub_pd (src1.y[i], src2.y[i], src3.y[i]); 125 126 if (check_msubpd ()) 127 abort (); 128 129 for (i = 0; i < NUM; i++) 130 dst.y[i] = _mm_msub_sd (src1.y[i], src2.y[i], src3.y[i]); 131 132 if (check_msubsd ()) 133 abort (); 134} 135