1/* { dg-do run } */ 2/* { dg-require-effective-target fma4 } */ 3/* { dg-options "-O0 -mfma4" } */ 4 5#include "fma4-check.h" 6 7#include <x86intrin.h> 8#include <string.h> 9 10#define NUM 20 11 12union 13{ 14 __m128 x[NUM]; 15 float f[NUM * 4]; 16 __m128d y[NUM]; 17 double d[NUM * 2]; 18} dst, res, src1, src2, src3; 19 20 21/* Note that in macc*,msub*,mnmacc* and mnsub* instructions, the intermdediate 22 product is not rounded, only the addition is rounded. */ 23 24static void 25init_maccps () 26{ 27 int i; 28 for (i = 0; i < NUM * 4; i++) 29 { 30 src1.f[i] = i; 31 src2.f[i] = i + 10; 32 src3.f[i] = i + 20; 33 } 34} 35 36static void 37init_maccpd () 38{ 39 int i; 40 for (i = 0; i < NUM * 4; i++) 41 { 42 src1.d[i] = i; 43 src2.d[i] = i + 10; 44 src3.d[i] = i + 20; 45 } 46} 47 48static int 49check_maccps () 50{ 51 int i, j, check_fails = 0; 52 for (i = 0; i < NUM * 4; i = i + 4) 53 for (j = 0; j < 4; j++) 54 { 55 res.f[i + j] = (src1.f[i + j] * src2.f[i + j]) + src3.f[i + j]; 56 if (dst.f[i + j] != res.f[i + j]) 57 check_fails++; 58 } 59 return check_fails++; 60} 61 62static int 63check_maccpd () 64{ 65 int i, j, check_fails = 0; 66 for (i = 0; i < NUM * 2; i = i + 2) 67 for (j = 0; j < 2; j++) 68 { 69 res.d[i + j] = (src1.d[i + j] * src2.d[i + j]) + src3.d[i + j]; 70 if (dst.d[i + j] != res.d[i + j]) 71 check_fails++; 72 } 73 return check_fails++; 74} 75 76 77static int 78check_maccss () 79{ 80 int i, j, check_fails = 0; 81 for (i = 0; i < NUM * 4; i= i + 4) 82 { 83 res.f[i] = (src1.f[i] * src2.f[i]) + src3.f[i]; 84 if (dst.f[i] != res.f[i]) 85 check_fails++; 86 } 87 return check_fails++; 88} 89 90static int 91check_maccsd () 92{ 93 int i, j, check_fails = 0; 94 for (i = 0; i < NUM * 2; i = i + 2) 95 { 96 res.d[i] = (src1.d[i] * src2.d[i]) + src3.d[i]; 97 if (dst.d[i] != res.d[i]) 98 check_fails++; 99 } 100 return check_fails++; 101} 102 103static void 104fma4_test (void) 105{ 106 int i; 107 108 init_maccps (); 109 110 for (i = 0; i < NUM; i++) 111 dst.x[i] = _mm_macc_ps (src1.x[i], src2.x[i], src3.x[i]); 112 113 if (check_maccps ()) 114 abort (); 115 116 for (i = 0; i < NUM; i++) 117 dst.x[i] = _mm_macc_ss (src1.x[i], src2.x[i], src3.x[i]); 118 119 if (check_maccss ()) 120 abort (); 121 122 init_maccpd (); 123 124 for (i = 0; i < NUM; i++) 125 dst.y[i] = _mm_macc_pd (src1.y[i], src2.y[i], src3.y[i]); 126 127 if (check_maccpd ()) 128 abort (); 129 130 for (i = 0; i < NUM; i++) 131 dst.y[i] = _mm_macc_sd (src1.y[i], src2.y[i], src3.y[i]); 132 133 if (check_maccsd ()) 134 abort (); 135 136} 137