1/* Test that the compiler properly optimizes floating point multiply and add 2 instructions vector into vfmaddps on FMA4 systems. */ 3 4/* { dg-do compile } */ 5/* { dg-require-effective-target lp64 } */ 6/* { dg-options "-O2 -mfma4 -ftree-vectorize" } */ 7 8extern void exit (int); 9 10typedef float __m256 __attribute__ ((__vector_size__ (32), __may_alias__)); 11typedef double __m256d __attribute__ ((__vector_size__ (32), __may_alias__)); 12 13#define SIZE 10240 14 15union { 16 __m256 f_align; 17 __m256d d_align; 18 float f[SIZE]; 19 double d[SIZE]; 20} a, b, c, d; 21 22void 23flt_mul_add (void) 24{ 25 int i; 26 27 for (i = 0; i < SIZE; i++) 28 a.f[i] = (b.f[i] * c.f[i]) + d.f[i]; 29} 30 31void 32dbl_mul_add (void) 33{ 34 int i; 35 36 for (i = 0; i < SIZE; i++) 37 a.d[i] = (b.d[i] * c.d[i]) + d.d[i]; 38} 39 40void 41flt_mul_sub (void) 42{ 43 int i; 44 45 for (i = 0; i < SIZE; i++) 46 a.f[i] = (b.f[i] * c.f[i]) - d.f[i]; 47} 48 49void 50dbl_mul_sub (void) 51{ 52 int i; 53 54 for (i = 0; i < SIZE; i++) 55 a.d[i] = (b.d[i] * c.d[i]) - d.d[i]; 56} 57 58void 59flt_neg_mul_add (void) 60{ 61 int i; 62 63 for (i = 0; i < SIZE; i++) 64 a.f[i] = (-(b.f[i] * c.f[i])) + d.f[i]; 65} 66 67void 68dbl_neg_mul_add (void) 69{ 70 int i; 71 72 for (i = 0; i < SIZE; i++) 73 a.d[i] = (-(b.d[i] * c.d[i])) + d.d[i]; 74} 75 76int main () 77{ 78 flt_mul_add (); 79 flt_mul_sub (); 80 flt_neg_mul_add (); 81 82 dbl_mul_add (); 83 dbl_mul_sub (); 84 dbl_neg_mul_add (); 85 exit (0); 86} 87 88/* { dg-final { scan-assembler "vfmaddps" } } */ 89/* { dg-final { scan-assembler "vfmaddpd" } } */ 90/* { dg-final { scan-assembler "vfmsubps" } } */ 91/* { dg-final { scan-assembler "vfmsubpd" } } */ 92/* { dg-final { scan-assembler "vfnmaddps" } } */ 93/* { dg-final { scan-assembler "vfnmaddpd" } } */ 94