1/* Test that the compiler properly optimizes floating point multiply and add 2 instructions vector into vfmaddps on FMA4 systems. */ 3 4/* { dg-do compile { target { ! { ia32 } } } } */ 5/* { dg-options "-O2 -mfma4 -ftree-vectorize -mtune=generic" } */ 6 7extern void exit (int); 8 9typedef float __m256 __attribute__ ((__vector_size__ (32), __may_alias__)); 10typedef double __m256d __attribute__ ((__vector_size__ (32), __may_alias__)); 11 12#define SIZE 10240 13 14union { 15 __m256 f_align; 16 __m256d d_align; 17 float f[SIZE]; 18 double d[SIZE]; 19} a, b, c, d; 20 21void 22flt_mul_add (void) 23{ 24 int i; 25 26 for (i = 0; i < SIZE; i++) 27 a.f[i] = (b.f[i] * c.f[i]) + d.f[i]; 28} 29 30void 31dbl_mul_add (void) 32{ 33 int i; 34 35 for (i = 0; i < SIZE; i++) 36 a.d[i] = (b.d[i] * c.d[i]) + d.d[i]; 37} 38 39void 40flt_mul_sub (void) 41{ 42 int i; 43 44 for (i = 0; i < SIZE; i++) 45 a.f[i] = (b.f[i] * c.f[i]) - d.f[i]; 46} 47 48void 49dbl_mul_sub (void) 50{ 51 int i; 52 53 for (i = 0; i < SIZE; i++) 54 a.d[i] = (b.d[i] * c.d[i]) - d.d[i]; 55} 56 57void 58flt_neg_mul_add (void) 59{ 60 int i; 61 62 for (i = 0; i < SIZE; i++) 63 a.f[i] = (-(b.f[i] * c.f[i])) + d.f[i]; 64} 65 66void 67dbl_neg_mul_add (void) 68{ 69 int i; 70 71 for (i = 0; i < SIZE; i++) 72 a.d[i] = (-(b.d[i] * c.d[i])) + d.d[i]; 73} 74 75int main () 76{ 77 flt_mul_add (); 78 flt_mul_sub (); 79 flt_neg_mul_add (); 80 81 dbl_mul_add (); 82 dbl_mul_sub (); 83 dbl_neg_mul_add (); 84 exit (0); 85} 86 87/* { dg-final { scan-assembler "vfmaddps" } } */ 88/* { dg-final { scan-assembler "vfmaddpd" } } */ 89/* { dg-final { scan-assembler "vfmsubps" } } */ 90/* { dg-final { scan-assembler "vfmsubpd" } } */ 91/* { dg-final { scan-assembler "vfnmaddps" } } */ 92/* { dg-final { scan-assembler "vfnmaddpd" } } */ 93